{ "data_id": "465", "name": "analcatdata_cyyoung8092", "exact_name": "analcatdata_cyyoung8092", "version": 1, "version_label": null, "description": "**Author**: \n**Source**: Unknown - Date unknown \n**Please cite**: \n\nanalcatdata A collection of data sets used in the book \"Analyzing Categorical Data,\"\nby Jeffrey S. Simonoff, Springer-Verlag, New York, 2003. The submission\nconsists of a zip file containing two versions of each of 84 data sets,\nplus this README file. Each data set is given in comma-delimited ASCII\n(.csv) form, and Microsoft Excel (.xls) form.\n\nNOTICE: These data sets may be used freely for scientific, educational and\/or\nnoncommercial purposes, provided suitable acknowledgment is given (by citing\nthe above-named reference).\n\nFurther details concerning the book, including information on statistical software\n(including sample S-PLUS\/R and SAS code), are available at the web site\n\nhttp:\/\/www.stern.nyu.edu\/~jsimonof\/AnalCatData\n\n\nInformation about the dataset\nCLASSTYPE: nominal\nCLASSINDEX: last\n\n\nNote: Quotes, Single-Quotes and Backslashes were removed, Blanks replaced\nwith Underscores", "format": "ARFF", "uploader": "Joaquin Vanschoren", "uploader_id": 2, "visibility": "public", "creator": "Jeffrey S. Simonoff", "contributor": null, "date": "2014-09-28 23:51:18", "update_comment": null, "last_update": "2014-09-28 23:51:18", "licence": "Public", "status": "active", "error_message": null, "url": "https:\/\/www.openml.org\/data\/download\/52577\/analcatdata_cyyoung8092.arff", "default_target_attribute": "Cy_Young", "row_id_attribute": null, "ignore_attribute": null, "runs": 698, "suggest": { "input": [ "analcatdata_cyyoung8092", "analcatdata A collection of data sets used in the book \"Analyzing Categorical Data,\" by Jeffrey S. Simonoff, Springer-Verlag, New York, 2003. The submission consists of a zip file containing two versions of each of 84 data sets, plus this README file. Each data set is given in comma-delimited ASCII (.csv) form, and Microsoft Excel (.xls) form. NOTICE: These data sets may be used freely for scientific, educational and\/or noncommercial purposes, provided suitable acknowledgment is given (by citing " ], "weight": 5 }, "qualities": { "NumberOfInstances": 97, "NumberOfFeatures": 11, "NumberOfClasses": 2, "NumberOfMissingValues": 0, "NumberOfInstancesWithMissingValues": 0, "NumberOfNumericFeatures": 7, "NumberOfSymbolicFeatures": 4, "MeanSkewnessOfNumericAtts": -0.020321797177008594, "Quartile1StdDevOfNumericAtts": 0.691644117340375, "REPTreeDepth2AUC": 0.514269406392694, "CfsSubsetEval_kNN1NErrRate": 0.17525773195876287, "kNN1NAUC": 0.5225456621004566, "J48.001.Kappa": 0.4044781509570241, "MeanStdDevOfNumericAtts": 25.509447604043498, "Quartile2AttributeEntropy": 0.9999233329473267, "REPTreeDepth2ErrRate": 0.26804123711340205, "CfsSubsetEval_kNN1NKappa": 0.4044781509570241, "kNN1NErrRate": 0.31958762886597936, "MajorityClassPercentage": 75.25773195876289, "MinAttributeEntropy": 0.9999233329473267, "Quartile2KurtosisOfNumericAtts": -1.0638322439395984, "REPTreeDepth2Kappa": -0.039571310799670266, "ClassEntropy": 0.8071675554288905, "kNN1NKappa": 0.018282729350310254, "MajorityClassSize": 73, "MinKurtosisOfNumericAtts": -1.804438462331662, "Quartile2MeansOfNumericAtts": 19, "REPTreeDepth3AUC": 0.514269406392694, "DecisionStumpAUC": 0.6695205479452054, "MaxAttributeEntropy": 5.7068284042171245, "MinMeansOfNumericAtts": 0.5965567010309278, "Quartile2MutualInformation": 0.10960074697762, "REPTreeDepth3ErrRate": 0.26804123711340205, "DecisionStumpErrRate": 0.20618556701030927, "MaxKurtosisOfNumericAtts": 0.910163421455668, "MinMutualInformation": 2.520698808e-5, "Quartile2SkewnessOfNumericAtts": -0.026634937479918942, "REPTreeDepth3Kappa": -0.039571310799670266, "DecisionStumpKappa": 0.33470507544581596, "MaxMeansOfNumericAtts": 1986.4639175257732, "MinNominalAttDistinctValues": 2, "PercentageOfBinaryFeatures": 27.27272727272727, "Quartile2StdDevOfNumericAtts": 7.972851809696542, "RandomTreeDepth1AUC": 0.6215753424657534, "Dimensionality": 0.1134020618556701, "MaxMutualInformation": 0.65000144720377, "MinSkewnessOfNumericAtts": -0.9491608034661932, "PercentageOfInstancesWithMissingValues": 0, "Quartile3AttributeEntropy": 5.7068284042171245, "RandomTreeDepth1ErrRate": 0.28865979381443296, "EquivalentNumberOfAtts": 3.1877505505445076, "MaxNominalAttDistinctValues": 62, "MinStdDevOfNumericAtts": 0.16469257782002025, "PercentageOfMissingValues": 0, "Quartile3KurtosisOfNumericAtts": 0.3324272494196925, "AutoCorrelation": 0.53125, "RandomTreeDepth1Kappa": 0.06858710562414264, "J48.00001.AUC": 0.6232876712328768, "MaxSkewnessOfNumericAtts": 0.7808779753981469, "MinorityClassPercentage": 24.742268041237114, "PercentageOfNumericFeatures": 63.63636363636363, "Quartile3MeansOfNumericAtts": 175.60824742268042, "CfsSubsetEval_DecisionStumpAUC": 0.6232876712328768, "RandomTreeDepth2AUC": 0.6215753424657534, "J48.00001.ErrRate": 0.17525773195876287, "MaxStdDevOfNumericAtts": 83.10066143702757, "MinorityClassSize": 24, "PercentageOfSymbolicFeatures": 36.36363636363637, "Quartile3MutualInformation": 0.65000144720377, "CfsSubsetEval_DecisionStumpErrRate": 0.17525773195876287, "RandomTreeDepth2ErrRate": 0.28865979381443296, "J48.00001.Kappa": 0.4044781509570241, "MeanAttributeEntropy": 2.568891690037259, "NaiveBayesAUC": 0.8264840182648402, "Quartile1AttributeEntropy": 0.9999233329473267, "Quartile3SkewnessOfNumericAtts": 0.22340613413203642, "CfsSubsetEval_DecisionStumpKappa": 0.4044781509570241, "RandomTreeDepth2Kappa": 0.06858710562414264, "J48.0001.AUC": 0.6232876712328768, "MeanKurtosisOfNumericAtts": -0.7200366661572135, "NaiveBayesErrRate": 0.30927835051546393, "Quartile1KurtosisOfNumericAtts": -1.7058223788950893, "Quartile3StdDevOfNumericAtts": 64.13536412363679, "CfsSubsetEval_NaiveBayesAUC": 0.6232876712328768, "RandomTreeDepth3AUC": 0.6215753424657534, "J48.0001.ErrRate": 0.17525773195876287, "MeanMeansOfNumericAtts": 332.02545802650957, "MeanMutualInformation": 0.2532091337231567, "NaiveBayesKappa": 0.3649061545176779, "Quartile1MeansOfNumericAtts": 2.756907216494845, "REPTreeDepth1AUC": 0.514269406392694, "CfsSubsetEval_NaiveBayesErrRate": 0.17525773195876287, "RandomTreeDepth3ErrRate": 0.28865979381443296, "J48.0001.Kappa": 0.4044781509570241, "MeanNoiseToSignalRatio": 9.145335803114937, "NumberOfBinaryFeatures": 3, "Quartile1MutualInformation": 2.520698808e-5, "REPTreeDepth1ErrRate": 0.26804123711340205, "CfsSubsetEval_NaiveBayesKappa": 0.4044781509570241, "RandomTreeDepth3Kappa": 0.06858710562414264, "J48.001.AUC": 0.6232876712328768, "MeanNominalAttDistinctValues": 17, "Quartile1SkewnessOfNumericAtts": -0.14279114242073762, "REPTreeDepth1Kappa": -0.039571310799670266, "CfsSubsetEval_kNN1NAUC": 0.6232876712328768, "StdvNominalAttDistinctValues": 30, "J48.001.ErrRate": 0.17525773195876287 }, "tags": [ { "tag": "mythbusting_1", "uploader": "1" }, { "tag": "study_1", "uploader": "2" }, { "tag": "study_15", "uploader": "939" }, { "tag": "study_20", "uploader": "939" }, { "tag": "study_41", "uploader": "1" } ], "topics": [ { "topic": "Book-based", "uploader": "8111" } ], "features": [ { "name": "Cy_Young", "index": "10", "type": "nominal", "distinct": "2", "missing": "0", "target": "1", "distr": [ [ "0", "1" ], [ [ "73", "0" ], [ "0", "24" ] ] ] }, { "name": "Year", "index": "0", "type": "numeric", "distinct": "12", "missing": "0", "min": "1980", "max": "1992", "mean": "1986", "stdev": "4" }, { "name": "Pitcher", "index": "1", "type": "nominal", "distinct": "62", "missing": "0", "distr": [ [ "A._Holland", "B._Blyleven", "B._Harvey", "B._James", "B._Saberhagen", "B._Stanley", "B._Sutter", "B._Thigpen", "B._Welch", "D._Aase", "D._Drabek", "D._Eckersley", "D._Erickson", "D._Gooden", "D._Jackson", "D._Quisenberry", "D._Righetti", "D._Stewart", "F._Valenzuela", "F._Viola", "G._Garber", "G._Gossage", "G._Maddux", "Gott", "Gullickson", "J._Andujar", "J._Denny", "J._Franco", "J._Morris", "J._Niekro", "J._Reardon", "J._Smiley", "J._Tudor", "K._Brown", "L._Hoyt", "L._Smith", "M._Boddicker", "M._Davis", "M._Norris", "M._Scott", "M._Soto", "M._Williams", "O._Hershiser", "P._Russell", "P._Vuckovich", "R._Aguilera", "R._Clemens", "R._Dibble", "R._Dotson", "R._Guidry", "R._Myers", "R._Sutcliffe", "S._Bedrosian", "S._Carlton", "S._Rawley", "S._Rogers", "S._Stone", "T._Glavine", "T._Henke", "T._Hume", "T._Worrell", "W._Hernandez" ], [ [ "1", "0" ], [ "1", "0" ], [ "1", "0" ], [ "1", "0" ], [ "0", "2" ], [ "1", "0" ], [ "3", "0" ], [ "2", "0" ], [ "0", "1" ], [ "1", "0" ], [ "0", "1" ], [ "3", "1" ], [ "1", "0" ], [ "0", "1" ], [ "1", "0" ], [ "5", "0" ], [ "1", "0" ], [ "4", "0" ], [ "1", "0" ], [ "1", "1" ], [ "1", "0" ], [ "2", "0" ], [ "1", "1" ], [ "1", "0" ], [ "1", "0" ], [ "1", "0" ], [ "0", "1" ], [ "2", "0" ], [ "2", "0" ], [ "1", "0" ], [ "4", "0" ], [ "1", "0" ], [ "1", "0" ], [ "1", "0" ], [ "1", "1" ], [ "6", "0" ], [ "1", "0" ], [ "0", "1" ], [ "1", "0" ], [ "1", "1" ], [ "1", "0" ], [ "1", "0" ], [ "0", "1" ], [ "1", "0" ], [ "0", "1" ], [ "1", "0" ], [ "0", "3" ], [ "1", "0" ], [ "1", "0" ], [ "1", "0" ], [ "2", "0" ], [ "1", "1" ], [ "0", "1" ], [ "0", "2" ], [ "1", "0" ], [ "1", "0" ], [ "0", "1" ], [ "1", "1" ], [ "1", "0" ], [ "1", "0" ], [ "1", "0" ], [ "0", "1" ] ] ] }, { "name": "League", "index": "2", "type": "nominal", "distinct": "2", "missing": "0", "distr": [ [ "AL", "NL" ], [ [ "37", "12" ], [ "36", "12" ] ] ] }, { "name": "Type", "index": "3", "type": "nominal", "distinct": "2", "missing": "0", "distr": [ [ "Reliever", "Starter" ], [ [ "44", "4" ], [ "29", "20" ] ] ] }, { "name": "Wins", "index": "4", "type": "numeric", "distinct": "20", "missing": "0", "min": "0", "max": "27", "mean": "13", "stdev": "8" }, { "name": "Win_pct", "index": "5", "type": "numeric", "distinct": "59", "missing": "0", "min": "0", "max": "1", "mean": "1", "stdev": "0" }, { "name": "Saves", "index": "6", "type": "numeric", "distinct": "25", "missing": "0", "min": "1", "max": "57", "mean": "19", "stdev": "19" }, { "name": "ERA", "index": "7", "type": "numeric", "distinct": "76", "missing": "0", "min": "1", "max": "4", "mean": "3", "stdev": "1" }, { "name": "Strikeouts", "index": "8", "type": "numeric", "distinct": "74", "missing": "0", "min": "32", "max": "306", "mean": "126", "stdev": "64" }, { "name": "Innings_pitched", "index": "9", "type": "numeric", "distinct": "71", "missing": "0", "min": "58", "max": "304", "mean": "176", "stdev": "83" } ], "nr_of_issues": 0, "nr_of_downvotes": 0, "nr_of_likes": 0, "nr_of_downloads": 0, "total_downloads": 0, "reach": 0, "reuse": 0, "impact_of_reuse": 0, "reach_of_reuse": 0, "impact": 0 }