{ "data_id": "43673", "name": "Womens-E-Commerce-Clothing-Reviews", "exact_name": "Womens-E-Commerce-Clothing-Reviews", "version": 2, "version_label": "v1.0", "description": "Context\nWelcome. This is a Womens Clothing E-Commerce dataset revolving around the reviews written by customers. Its nine supportive features offer a great environment to parse out the text through its multiple dimensions. Because this is real commercial data, it has been anonymized, and references to the company in the review text and body have been replaced with retailer.\nContent\nThis dataset includes 23486 rows and 10 feature variables. Each row corresponds to a customer review, and includes the variables:\n\nClothing ID: Integer Categorical variable that refers to the specific piece being reviewed. \nAge: Positive Integer variable of the reviewers age.\nTitle: String variable for the title of the review.\nReview Text: String variable for the review body. \nRating: Positive Ordinal Integer variable for the product score granted by the customer from 1 Worst, to 5 Best. \nRecommended IND: Binary variable stating where the customer recommends the product where 1 is recommended, 0 is not recommended. \nPositive Feedback Count: Positive Integer documenting the number of other customers who found this review positive.\nDivision Name: Categorical name of the product high level division.\nDepartment Name: Categorical name of the product department name.\nClass Name: Categorical name of the product class name.\n\nAcknowledgements\nAnonymous but real source\nInspiration\nI look forward to come quality NLP! There is also some great opportunities for feature engineering, and multivariate analysis.\nPublications\nStatistical Analysis on E-Commerce Reviews, with Sentiment Classification using Bidirectional Recurrent Neural Network \nby Abien Fred Agarap - Github", "format": "arff", "uploader": "Dustin Carrion", "uploader_id": 30123, "visibility": "public", "creator": null, "contributor": null, "date": "2022-03-24 07:07:43", "update_comment": null, "last_update": "2022-03-24 07:07:43", "licence": "CC0: Public Domain", "status": "active", "error_message": null, "url": "https:\/\/www.openml.org\/data\/download\/22102498\/dataset", "default_target_attribute": null, "row_id_attribute": null, "ignore_attribute": null, "runs": 0, "suggest": { "input": [ "Womens-E-Commerce-Clothing-Reviews", "Context Welcome. This is a Womens Clothing E-Commerce dataset revolving around the reviews written by customers. Its nine supportive features offer a great environment to parse out the text through its multiple dimensions. Because this is real commercial data, it has been anonymized, and references to the company in the review text and body have been replaced with retailer. Content This dataset includes 23486 rows and 10 feature variables. Each row corresponds to a customer review, and includes " ], "weight": 5 }, "qualities": { "NumberOfInstances": 23486, "NumberOfFeatures": 11, "NumberOfClasses": null, "NumberOfMissingValues": 4697, "NumberOfInstancesWithMissingValues": 3824, "NumberOfNumericFeatures": 6, "NumberOfSymbolicFeatures": 0, "Dimensionality": 0.00046836413182321383, "PercentageOfNumericFeatures": 54.54545454545454, "MajorityClassPercentage": null, "PercentageOfSymbolicFeatures": 0, "MajorityClassSize": null, "MinorityClassPercentage": null, "MinorityClassSize": null, "NumberOfBinaryFeatures": 0, "PercentageOfBinaryFeatures": 0, "PercentageOfInstancesWithMissingValues": 16.282040364472454, "AutoCorrelation": null, "PercentageOfMissingValues": 1.8181044026228392 }, "tags": [ { "uploader": "38960", "tag": "Health" }, { "uploader": "38960", "tag": "Medicine" } ], "features": [ { "name": "Unnamed:_0", "index": "0", "type": "numeric", "distinct": "23486", "missing": "0", "min": "0", "max": "23485", "mean": "11743", "stdev": "6780" }, { "name": "Clothing_ID", "index": "1", "type": "numeric", "distinct": "1206", "missing": "0", "min": "0", "max": "1205", "mean": "918", "stdev": "203" }, { "name": "Age", "index": "2", "type": "numeric", "distinct": "77", "missing": "0", "min": "18", "max": "99", "mean": "43", "stdev": "12" }, { "name": "Title", "index": "3", "type": "string", "distinct": "13992", "missing": "3810" }, { "name": "Review_Text", "index": "4", "type": "string", "distinct": "22634", "missing": "845" }, { "name": "Rating", "index": "5", "type": "numeric", "distinct": "5", "missing": "0", "min": "1", "max": "5", "mean": "4", "stdev": "1" }, { "name": "Recommended_IND", "index": "6", "type": "numeric", "distinct": "2", "missing": "0", "min": "0", "max": "1", "mean": "1", "stdev": "0" }, { "name": "Positive_Feedback_Count", "index": "7", "type": "numeric", "distinct": "82", "missing": "0", "min": "0", "max": "122", "mean": "3", "stdev": "6" }, { "name": "Division_Name", "index": "8", "type": "string", "distinct": "3", "missing": "14" }, { "name": "Department_Name", "index": "9", "type": "string", "distinct": "6", "missing": "14" }, { "name": "Class_Name", "index": "10", "type": "string", "distinct": "20", "missing": "14" } ], "nr_of_issues": 0, "nr_of_downvotes": 0, "nr_of_likes": 0, "nr_of_downloads": 0, "total_downloads": 0, "reach": 0, "reuse": 0, "impact_of_reuse": 0, "reach_of_reuse": 0, "impact": 0 }