{ "data_id": "379", "name": "SyskillWebert-Goats", "exact_name": "SyskillWebert-Goats", "version": 1, "version_label": null, "description": "**Author**: Michael Pazzani (pazzani@ics.uci.edu) \n**Source**: [UCI](https:\/\/archive.ics.uci.edu\/ml\/datasets\/Syskill+and+Webert+Web+Page+Ratings)- 1999 \n**Please cite**: \n\n**Syskill and Webert Web Page Ratings** \nThis database contains the HTML source of web pages plus the ratings of a single user on these web pages. The web pages are on four separate subjects (Bands- recording artists; Goats; Sheep; and BioMedical)\n\nThe HTML source of a web page is given. Users looked at each web page and indicated on a 3 point scale (hot medium cold) 50-100 pages per domain. However, this is realistic because we want to learn user profiles from as few examples as possible so that users have an incentive to rate pages.\n\nThe problem is to predict user ratings for web pages (within a subject category). The accuracy of predicting ratings is reported in early publications. Later publications used the precision at top N or the F-measure.\n\n**Past Usage** \nPazzani M., Billsus, D. (1997). Learning and Revising User Profiles: The identification of interesting web sites. Machine Learning 27, 313-331\n\nPazzani, M., Muramatsu J., Billsus, D. (1996). Syskill & Webert: Identifying interesting web sites. Proceedings of the National Conference on Artificial Intelligence, Portland, OR.", "format": "ARFF", "uploader": "Joaquin Vanschoren", "uploader_id": 2, "visibility": "public", "creator": "Michael Pazzani", "contributor": null, "date": "2014-09-27 10:56:13", "update_comment": "Converted from latin to utf-8", "last_update": "2015-08-25 11:19:07", "licence": "Public", "status": "active", "error_message": null, "url": "https:\/\/www.openml.org\/data\/download\/1663742\/phpUAQns6", "default_target_attribute": "class", "row_id_attribute": null, "ignore_attribute": null, "runs": 0, "suggest": { "input": [ "SyskillWebert-Goats", "This database contains the HTML source of web pages plus the ratings of a single user on these web pages. The web pages are on four separate subjects (Bands- recording artists; Goats; Sheep; and BioMedical) The HTML source of a web page is given. Users looked at each web page and indicated on a 3 point scale (hot medium cold) 50-100 pages per domain. However, this is realistic because we want to learn user profiles from as few examples as possible so that users have an incentive to rate pages. T " ], "weight": 5 }, "qualities": { "NumberOfInstances": 70, "NumberOfFeatures": 3, "NumberOfClasses": 3, "NumberOfMissingValues": 0, "NumberOfInstancesWithMissingValues": 0, "NumberOfNumericFeatures": 0, "NumberOfSymbolicFeatures": 1, "Quartile2KurtosisOfNumericAtts": null, "REPTreeDepth2Kappa": null, "ClassEntropy": 1.0900005236839503, "kNN1NKappa": null, "MajorityClassSize": 37, "MinAttributeEntropy": null, "Quartile2MeansOfNumericAtts": null, "REPTreeDepth3AUC": null, "DecisionStumpAUC": null, "MaxAttributeEntropy": null, "MinKurtosisOfNumericAtts": null, "Quartile2MutualInformation": null, "REPTreeDepth3ErrRate": null, "DecisionStumpErrRate": null, "MaxKurtosisOfNumericAtts": null, "MinMeansOfNumericAtts": null, "Quartile2SkewnessOfNumericAtts": null, "REPTreeDepth3Kappa": null, "DecisionStumpKappa": null, "MaxMeansOfNumericAtts": null, "MinMutualInformation": null, "PercentageOfBinaryFeatures": 0, "Quartile2StdDevOfNumericAtts": null, "RandomTreeDepth1AUC": null, "Dimensionality": 0.04285714285714286, "MaxMutualInformation": null, "MinNominalAttDistinctValues": 3, "PercentageOfInstancesWithMissingValues": 0, "Quartile3AttributeEntropy": null, "RandomTreeDepth1ErrRate": null, "EquivalentNumberOfAtts": null, "MaxNominalAttDistinctValues": 3, "MinSkewnessOfNumericAtts": null, "PercentageOfMissingValues": 0, "Quartile3KurtosisOfNumericAtts": null, "AutoCorrelation": 0.4927536231884058, "RandomTreeDepth1Kappa": null, "J48.00001.AUC": null, "MaxSkewnessOfNumericAtts": null, "MinStdDevOfNumericAtts": null, "PercentageOfNumericFeatures": 0, "Quartile3MeansOfNumericAtts": null, "CfsSubsetEval_DecisionStumpAUC": null, "RandomTreeDepth2AUC": null, "J48.00001.ErrRate": null, "MaxStdDevOfNumericAtts": null, "MinorityClassPercentage": 1.4285714285714286, "PercentageOfSymbolicFeatures": 33.33333333333333, "Quartile3MutualInformation": null, "CfsSubsetEval_DecisionStumpErrRate": null, "RandomTreeDepth2ErrRate": null, "J48.00001.Kappa": null, "MeanAttributeEntropy": null, "MinorityClassSize": 1, "Quartile1AttributeEntropy": null, "Quartile3SkewnessOfNumericAtts": null, "CfsSubsetEval_DecisionStumpKappa": null, "RandomTreeDepth2Kappa": null, "J48.0001.AUC": null, "MeanKurtosisOfNumericAtts": null, "NaiveBayesAUC": null, "Quartile1KurtosisOfNumericAtts": null, "Quartile3StdDevOfNumericAtts": null, "CfsSubsetEval_NaiveBayesAUC": null, "RandomTreeDepth3AUC": null, "J48.0001.ErrRate": null, "MeanMeansOfNumericAtts": null, "NaiveBayesErrRate": null, "Quartile1MeansOfNumericAtts": null, "REPTreeDepth1AUC": null, "CfsSubsetEval_NaiveBayesErrRate": null, "RandomTreeDepth3ErrRate": null, "J48.0001.Kappa": null, "MeanMutualInformation": null, "NaiveBayesKappa": null, "Quartile1MutualInformation": null, "REPTreeDepth1ErrRate": null, "CfsSubsetEval_NaiveBayesKappa": null, "RandomTreeDepth3Kappa": null, "J48.001.AUC": null, "MeanNoiseToSignalRatio": null, "NumberOfBinaryFeatures": 0, "Quartile1SkewnessOfNumericAtts": null, "REPTreeDepth1Kappa": null, "CfsSubsetEval_kNN1NAUC": null, "StdvNominalAttDistinctValues": 0, "J48.001.ErrRate": null, "MeanNominalAttDistinctValues": 3, "Quartile1StdDevOfNumericAtts": null, "REPTreeDepth2AUC": null, "CfsSubsetEval_kNN1NErrRate": null, "kNN1NAUC": null, "J48.001.Kappa": null, "MeanSkewnessOfNumericAtts": null, "Quartile2AttributeEntropy": null, "REPTreeDepth2ErrRate": null, "CfsSubsetEval_kNN1NKappa": null, "kNN1NErrRate": null, "MajorityClassPercentage": 52.85714285714286, "MeanStdDevOfNumericAtts": null }, "tags": [ { "uploader": "38960", "tag": "Machine Learning" }, { "uploader": "9180", "tag": "uci" } ], "features": [ { "name": "class", "index": "2", "type": "nominal", "distinct": "3", "missing": "0", "target": "1", "distr": [ [ "cold", "hot", "medium" ], [ [ "37", "0", "0" ], [ "0", "32", "0" ], [ "0", "0", "1" ] ] ] }, { "name": "id", "index": "0", "type": "string", "distinct": "70", "missing": "0" }, { "name": "text", "index": "1", "type": "string", "distinct": "68", "missing": "0" } ], "nr_of_issues": 0, "nr_of_downvotes": 0, "nr_of_likes": 0, "nr_of_downloads": 0, "total_downloads": 0, "reach": 0, "reuse": 0, "impact_of_reuse": 0, "reach_of_reuse": 0, "impact": 0 }