{ "data_id": "43360", "name": "Popular-Halloween-2020--Costumes-Amazon-Reviews", "exact_name": "Popular-Halloween-2020--Costumes-Amazon-Reviews", "version": 1, "version_label": "v1.0", "description": "Context\nSo it's Halloween again dear Kagglers!\nAnd what better way of celebrating than with some NLP!\nThe dataset brings you the reviews of popular Halloween costumes sold on amazon as of November 2020.\nContent\nThe dataset contains popular costumes from the Amazon website, for each costume there are user review texts including the review title and the review score, also you will find the publishing date and location.\nThe data hasn't been preprocessed in any way so I think it can be a great exercise for aspiring data scientists who are looking to sharpen their skills in text preprocessing skills and feature extraction skills.", "format": "arff", "uploader": "Elif Ceren Gok", "uploader_id": 30125, "visibility": "public", "creator": null, "contributor": null, "date": "2022-03-23 12:39:27", "update_comment": null, "last_update": "2022-03-23 12:39:27", "licence": "CC0: Public Domain", "status": "active", "error_message": null, "url": "https:\/\/www.openml.org\/data\/download\/22102185\/dataset", "default_target_attribute": null, "row_id_attribute": null, "ignore_attribute": null, "runs": 0, "suggest": { "input": [ "Popular-Halloween-2020--Costumes-Amazon-Reviews", "Context So it's Halloween again dear Kagglers! And what better way of celebrating than with some NLP! The dataset brings you the reviews of popular Halloween costumes sold on amazon as of November 2020. Content The dataset contains popular costumes from the Amazon website, for each costume there are user review texts including the review title and the review score, also you will find the publishing date and location. The data hasn't been preprocessed in any way so I think it can be a great exerc " ], "weight": 5 }, "qualities": { "NumberOfInstances": 7814, "NumberOfFeatures": 5, "NumberOfClasses": null, "NumberOfMissingValues": 16, "NumberOfInstancesWithMissingValues": 16, "NumberOfNumericFeatures": 1, "NumberOfSymbolicFeatures": 0, "Dimensionality": 0.000639877143588431, "PercentageOfNumericFeatures": 20, "MajorityClassPercentage": null, "PercentageOfSymbolicFeatures": 0, "MajorityClassSize": null, "MinorityClassPercentage": null, "MinorityClassSize": null, "NumberOfBinaryFeatures": 0, "PercentageOfBinaryFeatures": 0, "PercentageOfInstancesWithMissingValues": 0.2047606859482979, "AutoCorrelation": null, "PercentageOfMissingValues": 0.040952137189659586 }, "tags": [ { "uploader": "38960", "tag": "Machine Learning" }, { "uploader": "38960", "tag": "Mathematics" } ], "features": [ { "name": "text", "index": "0", "type": "string", "distinct": "7666", "missing": "0" }, { "name": "date", "index": "1", "type": "string", "distinct": "2247", "missing": "0" }, { "name": "title", "index": "2", "type": "string", "distinct": "5522", "missing": "16" }, { "name": "rating", "index": "3", "type": "numeric", "distinct": "5", "missing": "0", "min": "1", "max": "5", "mean": "4", "stdev": "1" }, { "name": "product_name", "index": "4", "type": "string", "distinct": "73", "missing": "0" } ], "nr_of_issues": 0, "nr_of_downvotes": 0, "nr_of_likes": 0, "nr_of_downloads": 0, "total_downloads": 0, "reach": 0, "reuse": 0, "impact_of_reuse": 0, "reach_of_reuse": 0, "impact": 0 }