{ "data_id": "41275", "name": "wine-reviews", "exact_name": "wine-reviews", "version": 1, "version_label": null, "description": "130k wine reviews with variety, location, winery, price, and description. Downloaded from Kaggle [https:\/\/www.kaggle.com\/zynicide\/wine-reviews\/home] on 29.10.2018. The original data was scraped from the WineEnthusiast homepage [https:\/\/www.winemag.com\/?s=&drink_type=wine]. The second version of the dataset was used, which was scraped on 22.11.2017. The Kaggle dataset was licensed under Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International (CC BY-NC-SA 4.0) [https:\/\/creativecommons.org\/licenses\/by-nc-sa\/4.0\/]. The variable 'points' (the number of points WineEnthusiast rated the wine on a scale of 1-100) was selected as target variable. For a description of all variables, checkout the Kaggle dataset repo. The variable 'region_2' is ignored by default as it contains a large portion of missing values. The variable 'designation' is not used by default, as the number of factor labels is extremely high compared to the number of observations. The dataset further includes the text based variables 'description', 'taster_twitter_handle', and 'title' (ignored by default) which could be used to construct additional features. Special characters in text features have been removed to allow the upload to the platform. The ID variable from the Kaggle version was removed from the dataset. The factor labels of all nominal features had to be changed to integers to prevent a problem which would not allow the upload of nominal features with too many and too long labels.", "format": "ARFF", "uploader": "Florian Pargent", "uploader_id": 1475, "visibility": "public", "creator": null, "contributor": null, "date": "2018-11-12 14:46:51", "update_comment": null, "last_update": "2018-11-12 14:46:51", "licence": "CC BY-NC-SA 4.0", "status": "active", "error_message": null, "url": "https:\/\/www.openml.org\/data\/download\/20649219\/file4292b2228a8.arff", "default_target_attribute": "points", "row_id_attribute": null, "ignore_attribute": "\"region_2,designation,description,taster_twitter_handle,title\"", "runs": 0, "suggest": { "input": [ "wine-reviews", "130k wine reviews with variety, location, winery, price, and description. Downloaded from Kaggle [https:\/\/www.kaggle.com\/zynicide\/wine-reviews\/home] on 29.10.2018. The original data was scraped from the WineEnthusiast homepage [https:\/\/www.winemag.com\/?s=&drink_type=wine]. The second version of the dataset was used, which was scraped on 22.11.2017. The Kaggle dataset was licensed under Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International (CC BY-NC-SA 4.0) [https:\/\/creativecomm " ], "weight": 5 }, "qualities": { "NumberOfInstances": 129971, "NumberOfFeatures": 13, "NumberOfClasses": 0, "NumberOfMissingValues": 204752, "NumberOfInstancesWithMissingValues": 107584, "NumberOfNumericFeatures": 2, "NumberOfSymbolicFeatures": 6, "AutoCorrelation": 0.8414018619681465, "MeanMutualInformation": null, "Quartile1StdDevOfNumericAtts": 3.039730202916243, "ClassEntropy": null, "MeanNoiseToSignalRatio": null, "Quartile2AttributeEntropy": null, "Dimensionality": 0.00010002231266974941, "MeanNominalAttDistinctValues": 3196.666666666667, "Quartile2KurtosisOfNumericAtts": 414.6121091629397, "EquivalentNumberOfAtts": null, "MeanSkewnessOfNumericAtts": 9.02343908416189, "Quartile2MeansOfNumericAtts": 61.90526366886985, "MajorityClassPercentage": null, "MeanStdDevOfNumericAtts": 22.03097393550256, "Quartile2MutualInformation": null, "MajorityClassSize": null, "MinAttributeEntropy": null, "Quartile2SkewnessOfNumericAtts": 9.023439084161888, "MaxAttributeEntropy": null, "MinKurtosisOfNumericAtts": -0.2959631898416748, "PercentageOfBinaryFeatures": 0, "Quartile2StdDevOfNumericAtts": 22.03097393550256, "MaxKurtosisOfNumericAtts": 829.5201815157211, "MinMeansOfNumericAtts": 35.363389129985684, "PercentageOfInstancesWithMissingValues": 82.775388355864, "Quartile3AttributeEntropy": null, "MaxMeansOfNumericAtts": 88.44713820775402, "MinMutualInformation": null, "PercentageOfMissingValues": 12.118206250743508, "Quartile3KurtosisOfNumericAtts": 829.5201815157211, "MaxMutualInformation": null, "MinNominalAttDistinctValues": 19, "PercentageOfNumericFeatures": 15.384615384615385, "Quartile3MeansOfNumericAtts": 88.44713820775402, "MaxNominalAttDistinctValues": 16757, "MinSkewnessOfNumericAtts": 0.045920752455346665, "PercentageOfSymbolicFeatures": 46.15384615384615, "Quartile3MutualInformation": null, "MaxSkewnessOfNumericAtts": 18.00095741586843, "MinStdDevOfNumericAtts": 3.039730202916243, "Quartile1AttributeEntropy": null, "Quartile3SkewnessOfNumericAtts": 18.00095741586843, "MaxStdDevOfNumericAtts": 41.02221766808888, "MinorityClassPercentage": null, "Quartile1KurtosisOfNumericAtts": -0.2959631898416748, "Quartile3StdDevOfNumericAtts": 41.02221766808888, "MeanAttributeEntropy": null, "MinorityClassSize": null, "Quartile1MeansOfNumericAtts": 35.363389129985684, "StdvNominalAttDistinctValues": 6658.499941177943, "MeanKurtosisOfNumericAtts": 414.6121091629397, "NumberOfBinaryFeatures": 0, "Quartile1MutualInformation": null, "MeanMeansOfNumericAtts": 61.90526366886985, "Quartile1SkewnessOfNumericAtts": 0.045920752455346665 }, "tags": [ { "tag": "Manufacturing", "uploader": "38960" }, { "tag": "Social Media", "uploader": "38960" } ], "features": [ { "name": "points", "index": "3", "type": "numeric", "distinct": "21", "missing": "0", "target": "1", "min": "80", "max": "100", "mean": "88", "stdev": "3" }, { "name": "country", "index": "0", "type": "nominal", "distinct": "43", "missing": "63", "distr": [] }, { "name": "description", "index": "1", "type": "string", "distinct": "119951", "missing": "0" }, { "name": "designation", "index": "2", "type": "string", "distinct": "37526", "missing": "37465" }, { "name": "price", "index": "4", "type": "numeric", "distinct": "390", "missing": "8996", "min": "4", "max": "3300", "mean": "35", "stdev": "41" }, { "name": "province", "index": "5", "type": "nominal", "distinct": "425", "missing": "63", "distr": [] }, { "name": "region_1", "index": "6", "type": "nominal", "distinct": "1229", "missing": "21247", "distr": [] }, { "name": "region_2", "index": "7", "type": "string", "distinct": "17", "missing": "79460" }, { "name": "taster_name", "index": "8", "type": "nominal", "distinct": "19", "missing": "26244", "distr": [] }, { "name": "taster_twitter_handle", "index": "9", "type": "string", "distinct": "15", "missing": "31213" }, { "name": "title", "index": "10", "type": "string", "distinct": "118815", "missing": "0" }, { "name": "variety", "index": "11", "type": "nominal", "distinct": "707", "missing": "1", "distr": [] }, { "name": "winery", "index": "12", "type": "nominal", "distinct": "16757", "missing": "0", "distr": [] } ], "nr_of_issues": 0, "nr_of_downvotes": 0, "nr_of_likes": 0, "nr_of_downloads": 1, "total_downloads": 1, "reach": 1, "reuse": 9, "impact_of_reuse": 0, "reach_of_reuse": 0, "impact": 9 }