{ "data_id": "43492", "name": "Kaggle-Survey-2017-2020-Merged-Data", "exact_name": "Kaggle-Survey-2017-2020-Merged-Data", "version": 1, "version_label": "v1.0", "description": "Context\nEvery year Kaggle conducts an industry-wide survey that presents a truly comprehensive view of the state of data science and machine learning. This dataset combines the data from the past 4 years (2017-2020).\nContent\nData was acquired and cleaned by Kaggle Team. I merged the dataset over the years using the notebook. https:\/\/www.kaggle.com\/harveenchadha\/merging-all-historical-survey-data-2017-2020", "format": "arff", "uploader": "Elif Ceren Gok", "uploader_id": 30125, "visibility": "public", "creator": null, "contributor": null, "date": "2022-03-23 13:27:41", "update_comment": null, "last_update": "2022-03-23 13:27:41", "licence": "CC0: Public Domain", "status": "active", "error_message": null, "url": "https:\/\/www.openml.org\/data\/download\/22102317\/dataset", "default_target_attribute": null, "row_id_attribute": null, "ignore_attribute": null, "runs": 0, "suggest": { "input": [ "Kaggle-Survey-2017-2020-Merged-Data", "Context Every year Kaggle conducts an industry-wide survey that presents a truly comprehensive view of the state of data science and machine learning. This dataset combines the data from the past 4 years (2017-2020). Content Data was acquired and cleaned by Kaggle Team. I merged the dataset over the years using the notebook. https:\/\/www.kaggle.com\/harveenchadha\/merging-all-historical-survey-data-2017-2020 " ], "weight": 5 }, "qualities": { "NumberOfInstances": 80327, "NumberOfFeatures": 12, "NumberOfClasses": null, "NumberOfMissingValues": 226439, "NumberOfInstancesWithMissingValues": 57507, "NumberOfNumericFeatures": 2, "NumberOfSymbolicFeatures": 0, "Dimensionality": 0.00014938937094625718, "PercentageOfNumericFeatures": 16.666666666666664, "MajorityClassPercentage": null, "PercentageOfSymbolicFeatures": 0, "MajorityClassSize": null, "MinorityClassPercentage": null, "MinorityClassSize": null, "NumberOfBinaryFeatures": 0, "PercentageOfBinaryFeatures": 0, "PercentageOfInstancesWithMissingValues": 71.5911212917201, "AutoCorrelation": null, "PercentageOfMissingValues": 23.49137483868023 }, "tags": [ { "uploader": "38960", "tag": "Computer Systems" }, { "uploader": "38960", "tag": "Machine Learning" } ], "features": [ { "name": "index", "index": "0", "type": "numeric", "distinct": "80327", "missing": "0", "min": "0", "max": "80326", "mean": "40163", "stdev": "23189" }, { "name": "Age", "index": "1", "type": "string", "distinct": "11", "missing": "444" }, { "name": "Gender", "index": "2", "type": "string", "distinct": "5", "missing": "95" }, { "name": "Country", "index": "3", "type": "string", "distinct": "72", "missing": "120" }, { "name": "Degree", "index": "4", "type": "string", "distinct": "7", "missing": "2983" }, { "name": "Job_Title", "index": "5", "type": "string", "distinct": "32", "missing": "7214" }, { "name": "Company_Size", "index": "6", "type": "string", "distinct": "7", "missing": "47152" }, { "name": "Team_Size", "index": "7", "type": "string", "distinct": "7", "missing": "55422" }, { "name": "ML_Status_in_Company", "index": "8", "type": "string", "distinct": "6", "missing": "35301" }, { "name": "Compensation_Status", "index": "9", "type": "string", "distinct": "18", "missing": "20201" }, { "name": "Money_Spent", "index": "10", "type": "string", "distinct": "7", "missing": "57507" }, { "name": "Year", "index": "11", "type": "numeric", "distinct": "4", "missing": "0", "min": "2017", "max": "2020", "mean": "2019", "stdev": "1" } ], "nr_of_issues": 0, "nr_of_downvotes": 0, "nr_of_likes": 0, "nr_of_downloads": 0, "total_downloads": 0, "reach": 0, "reuse": 0, "impact_of_reuse": 0, "reach_of_reuse": 0, "impact": 0 }