{ "data_id": "43473", "name": "Hottest-Kaggle-Datasets", "exact_name": "Hottest-Kaggle-Datasets", "version": 1, "version_label": "v1.0", "description": "Context\nThis data was collected as a course project for the immersive data science course (by General Assembly and Misk Academy).\nContent\nThis dataset is in a CSV format, it consists of 5717 rows and 15 columns, where each row is a dataset on Kaggle and each column represents a feature of that dataset.\n\ntitle dataset name\nusability dataset usability rating by Kaggle\nnumoffiles number of files associated with the dataset\ntypesoffiles types of files associated with the dataset\nfiles_size size of the dataset files\nvote_counts total votes count by the dataset viewer\nmedal reward to popular datasets measured by the number of upvotes (votes by novices are excluded from medal calculation), [Bronze = 5 Votes, Silver = 20 Votes, Gold = 50 Votes]\nurl_reference reference to the dataset page on Kaggle in the format: www.kaggle.com\/url_reference\nkeywords Topics tagged with the dataset\nnumofcolumns number of features in the dataset\nviews number of views\ndownloads number of downloads\ndownloadperview download per view ratio\ndate_created dataset creation date\nlast_updated date of the last update\n\nAcknowledgements\nI would like to thank all my GA instructors for their continuous help and support\nAll data were taken from https:\/\/www.kaggle.com , collected on 30 Jan 2021 \nInspiration\nUsing this dataset, we could try to predict the upcoming datasets uploaded, number of votes, number of downloads, medal type, etc.", "format": "arff", "uploader": "Elif Ceren Gok", "uploader_id": 30125, "visibility": "public", "creator": null, "contributor": null, "date": "2022-03-23 13:25:00", "update_comment": null, "last_update": "2022-03-23 13:25:00", "licence": "CC0: Public Domain", "status": "active", "error_message": null, "url": "https:\/\/www.openml.org\/data\/download\/22102298\/dataset", "default_target_attribute": null, "row_id_attribute": null, "ignore_attribute": null, "runs": 0, "suggest": { "input": [ "Hottest-Kaggle-Datasets", "Context This data was collected as a course project for the immersive data science course (by General Assembly and Misk Academy). Content This dataset is in a CSV format, it consists of 5717 rows and 15 columns, where each row is a dataset on Kaggle and each column represents a feature of that dataset. title dataset name usability dataset usability rating by Kaggle numoffiles number of files associated with the dataset typesoffiles types of files associated with the dataset files_size size of th " ], "weight": 5 }, "qualities": { "NumberOfInstances": 5717, "NumberOfFeatures": 15, "NumberOfClasses": null, "NumberOfMissingValues": 4146, "NumberOfInstancesWithMissingValues": 3006, "NumberOfNumericFeatures": 7, "NumberOfSymbolicFeatures": 0, "Dimensionality": 0.0026237537169844323, "PercentageOfNumericFeatures": 46.666666666666664, "MajorityClassPercentage": null, "PercentageOfSymbolicFeatures": 0, "MajorityClassSize": null, "MinorityClassPercentage": null, "MinorityClassSize": null, "NumberOfBinaryFeatures": 0, "PercentageOfBinaryFeatures": 0, "PercentageOfInstancesWithMissingValues": 52.580024488368025, "AutoCorrelation": null, "PercentageOfMissingValues": 4.834703515829981 }, "tags": [ { "uploader": "38960", "tag": "Computer Systems" }, { "uploader": "38960", "tag": "Machine Learning" } ], "features": [ { "name": "title", "index": "0", "type": "string", "distinct": "5658", "missing": "1" }, { "name": "usability", "index": "1", "type": "numeric", "distinct": "39", "missing": "0", "min": "0", "max": "10", "mean": "7", "stdev": "2" }, { "name": "num_of_files", "index": "2", "type": "numeric", "distinct": "873", "missing": "333", "min": "1", "max": "3999566", "mean": "5744", "stdev": "66487" }, { "name": "types_of_files", "index": "3", "type": "string", "distinct": "26", "missing": "267" }, { "name": "files_size", "index": "4", "type": "string", "distinct": "1631", "missing": "66" }, { "name": "vote_counts", "index": "5", "type": "numeric", "distinct": "499", "missing": "0", "min": "1", "max": "9031", "mean": "76", "stdev": "277" }, { "name": "medal", "index": "6", "type": "string", "distinct": "4", "missing": "0" }, { "name": "url_reference", "index": "7", "type": "string", "distinct": "5710", "missing": "0" }, { "name": "keywords", "index": "8", "type": "string", "distinct": "3565", "missing": "618" }, { "name": "num_of_columns", "index": "9", "type": "numeric", "distinct": "201", "missing": "2668", "min": "0", "max": "7731", "mean": "41", "stdev": "239" }, { "name": "views", "index": "10", "type": "numeric", "distinct": "4532", "missing": "1", "min": "2", "max": "8305532", "mean": "25488", "stdev": "139730" }, { "name": "downloads", "index": "11", "type": "numeric", "distinct": "2595", "missing": "1", "min": "0", "max": "295813", "mean": "3052", "stdev": "11581" }, { "name": "download_per_view", "index": "12", "type": "numeric", "distinct": "112", "missing": "1", "min": "0", "max": "2", "mean": "0", "stdev": "0" }, { "name": "date_created", "index": "13", "type": "string", "distinct": "1447", "missing": "95" }, { "name": "last_updated", "index": "14", "type": "string", "distinct": "1379", "missing": "95" } ], "nr_of_issues": 0, "nr_of_downvotes": 0, "nr_of_likes": 0, "nr_of_downloads": 0, "total_downloads": 0, "reach": 0, "reuse": 0, "impact_of_reuse": 0, "reach_of_reuse": 0, "impact": 0 }