{ "data_id": "449", "name": "analcatdata_homerun", "exact_name": "analcatdata_homerun", "version": 1, "version_label": null, "description": "**Author**: \n**Source**: Unknown - Date unknown \n**Please cite**: \n\nanalcatdata A collection of data sets used in the book \"Analyzing Categorical Data,\"\nby Jeffrey S. Simonoff, Springer-Verlag, New York, 2003. The submission\nconsists of a zip file containing two versions of each of 84 data sets,\nplus this README file. Each data set is given in comma-delimited ASCII\n(.csv) form, and Microsoft Excel (.xls) form.\n\nNOTICE: These data sets may be used freely for scientific, educational and\/or\nnoncommercial purposes, provided suitable acknowledgment is given (by citing\nthe above-named reference).\n\nFurther details concerning the book, including information on statistical software\n(including sample S-PLUS\/R and SAS code), are available at the web site\n\nhttp:\/\/www.stern.nyu.edu\/~jsimonof\/AnalCatData\n\n\nInformation about the dataset\nCLASSTYPE: nominal\nCLASSINDEX: none specific\n\n\nNote: Quotes, Single-Quotes and Backslashes were removed, Blanks replaced\nwith Underscores", "format": "ARFF", "uploader": "Joaquin Vanschoren", "uploader_id": 2, "visibility": "public", "creator": "Jeffrey S. Simonoff", "contributor": null, "date": "2014-09-28 23:50:47", "update_comment": "set identifier feature", "last_update": "2014-10-06 16:34:09", "licence": "Public", "status": "active", "error_message": null, "url": "https:\/\/www.openml.org\/data\/download\/52561\/analcatdata_homerun.arff", "kaggle_url": null, "default_target_attribute": "Bonds.HR", "row_id_attribute": null, "ignore_attribute": "\"Game\"", "runs": 0, "suggest": { "input": [ "analcatdata_homerun", "analcatdata A collection of data sets used in the book \"Analyzing Categorical Data,\" by Jeffrey S. Simonoff, Springer-Verlag, New York, 2003. The submission consists of a zip file containing two versions of each of 84 data sets, plus this README file. Each data set is given in comma-delimited ASCII (.csv) form, and Microsoft Excel (.xls) form. NOTICE: These data sets may be used freely for scientific, educational and\/or noncommercial purposes, provided suitable acknowledgment is given (by citing " ], "weight": 5 }, "qualities": { "NumberOfInstances": 163, "NumberOfFeatures": 27, "NumberOfClasses": 5, "NumberOfMissingValues": 9, "NumberOfInstancesWithMissingValues": 1, "NumberOfNumericFeatures": 12, "NumberOfSymbolicFeatures": 15, "J48.001.ErrRate": 0.4444444444444444, "MeanNominalAttDistinctValues": 3.466666666666667, "Quartile1SkewnessOfNumericAtts": -0.03808443270504655, "REPTreeDepth1Kappa": -0.03259879803314512, "CfsSubsetEval_kNN1NAUC": 0.4955081569664903, "StdvNominalAttDistinctValues": 1.9952324127660872, "J48.001.Kappa": -0.03385924481474897, "MeanSkewnessOfNumericAtts": 0.3627001024738282, "Quartile1StdDevOfNumericAtts": 3.1503515618209827, "REPTreeDepth2AUC": 0.479636911255074, "CfsSubsetEval_kNN1NErrRate": 0.3765432098765432, "kNN1NAUC": 0.43441027182261405, "MajorityClassPercentage": 61.963190184049076, "MeanStdDevOfNumericAtts": 16.921501965086275, "Quartile2AttributeEntropy": 0.9999728498609091, "REPTreeDepth2ErrRate": 0.43209876543209874, "CfsSubsetEval_kNN1NKappa": 0, "kNN1NErrRate": 0.5740740740740741, "kNN1NKappa": -0.12131586781780293, "MajorityClassSize": 101, "MinAttributeEntropy": 0.16622269478192106, "Quartile2KurtosisOfNumericAtts": -0.7977535714962081, "REPTreeDepth2Kappa": -0.03259879803314512, "ClassEntropy": 1.2390282933621277, "MaxAttributeEntropy": 2.7101014215918826, "MinKurtosisOfNumericAtts": -1.3227963086268422, "Quartile2MeansOfNumericAtts": 10.240437779292584, "REPTreeDepth3AUC": 0.479636911255074, "DecisionStumpAUC": 0.5429993741821698, "MaxKurtosisOfNumericAtts": 1.0755732758130443, "MinMeansOfNumericAtts": 4.617283950617283, "Quartile2MutualInformation": 0.022926451119585002, "REPTreeDepth3ErrRate": 0.43209876543209874, "DecisionStumpErrRate": 0.3765432098765432, "MaxMeansOfNumericAtts": 92.82822085889566, "MinMutualInformation": 0.00561552213062, "Quartile2SkewnessOfNumericAtts": 0.30927723619348374, "REPTreeDepth3Kappa": -0.03259879803314512, "DecisionStumpKappa": 0, "MaxMutualInformation": 0.08976467226642, "MinNominalAttDistinctValues": 2, "PercentageOfBinaryFeatures": 29.629629629629626, "Quartile2StdDevOfNumericAtts": 5.962592155249089, "RandomTreeDepth1AUC": 0.5746014662812816, "Dimensionality": 0.1656441717791411, "MaxNominalAttDistinctValues": 7, "MinSkewnessOfNumericAtts": -0.05316626326280493, "PercentageOfInstancesWithMissingValues": 0.6134969325153374, "Quartile3AttributeEntropy": 1.5697674469792249, "RandomTreeDepth1ErrRate": 0.4506172839506173, "EquivalentNumberOfAtts": 33.87897705530106, "MaxSkewnessOfNumericAtts": 1.0554371885088176, "MinStdDevOfNumericAtts": 2.944254730161723, "PercentageOfMissingValues": 0.2044989775051125, "Quartile3KurtosisOfNumericAtts": 0.19012671027325556, "AutoCorrelation": 0.46296296296296297, "RandomTreeDepth1Kappa": 0.12864721485411143, "J48.00001.AUC": 0.463511774898521, "MaxStdDevOfNumericAtts": 52.647874431145844, "MinorityClassPercentage": 0.6134969325153374, "PercentageOfNumericFeatures": 44.44444444444444, "Quartile3MeansOfNumericAtts": 72.99235969097928, "CfsSubsetEval_DecisionStumpAUC": 0.4955081569664903, "RandomTreeDepth2AUC": 0.5746014662812816, "J48.00001.ErrRate": 0.4444444444444444, "MeanAttributeEntropy": 1.2248467722562069, "MinorityClassSize": 1, "PercentageOfSymbolicFeatures": 55.55555555555556, "Quartile3MutualInformation": 0.0660381393431625, "CfsSubsetEval_DecisionStumpErrRate": 0.3765432098765432, "RandomTreeDepth2ErrRate": 0.4506172839506173, "J48.00001.Kappa": -0.03385924481474897, "MeanKurtosisOfNumericAtts": -0.49788080338502716, "NaiveBayesAUC": 0.5870825046812658, "Quartile1AttributeEntropy": 0.821141702262164, "Quartile3SkewnessOfNumericAtts": 0.7289621406586595, "CfsSubsetEval_DecisionStumpKappa": 0, "RandomTreeDepth2Kappa": 0.12864721485411143, "J48.0001.AUC": 0.463511774898521, "MeanMeansOfNumericAtts": 29.4534449241334, "NaiveBayesErrRate": 0.4876543209876543, "Quartile1KurtosisOfNumericAtts": -1.208999788969232, "Quartile3StdDevOfNumericAtts": 41.63591978331559, "CfsSubsetEval_NaiveBayesAUC": 0.4955081569664903, "RandomTreeDepth3AUC": 0.5746014662812816, "J48.0001.ErrRate": 0.4444444444444444, "MeanMutualInformation": 0.03657218726939857, "NaiveBayesKappa": 0.040629685157421366, "Quartile1MeansOfNumericAtts": 4.877196470499129, "REPTreeDepth1AUC": 0.479636911255074, "CfsSubsetEval_NaiveBayesErrRate": 0.3765432098765432, "RandomTreeDepth3ErrRate": 0.4506172839506173, "J48.0001.Kappa": -0.03385924481474897, "J48.001.AUC": 0.463511774898521, "MeanNoiseToSignalRatio": 32.49120913205773, "NumberOfBinaryFeatures": 8, "Quartile1MutualInformation": 0.0141595490124875, "REPTreeDepth1ErrRate": 0.43209876543209874, "CfsSubsetEval_NaiveBayesKappa": 0, "RandomTreeDepth3Kappa": 0.12864721485411143 }, "tags": [ { "uploader": "38960", "tag": "Education" }, { "uploader": "38960", "tag": "Sports" }, { "uploader": "2", "tag": "study_1" } ], "topics": [ { "topic": "Book-based", "uploader": "8111" } ], "features": [ { "name": "Bonds.HR", "index": "26", "type": "nominal", "distinct": "4", "missing": "1", "target": "1", "distr": [ [ "0", "1", "2", "3" ], [ [ "101", "0", "0", "0" ], [ "0", "51", "0", "0" ], [ "0", "0", "8", "0" ], [ "0", "0", "0", "2" ] ] ] }, { "name": "Game", "index": "0", "type": "numeric", "distinct": "163", "missing": "0", "ignore": "1", "min": "1", "max": "163", "mean": "82", "stdev": "47" }, { "name": "StL.month", "index": "1", "type": "nominal", "distinct": "7", "missing": "0", "distr": [ [ "April", "August", "July", "June", "March", "May", "September" ], [ [ "14", "12", "0", "0" ], [ "19", "10", "1", "0" ], [ "22", "3", "2", "0" ], [ "18", "8", "1", "0" ], [ "0", "1", "0", "0" ], [ "15", "9", "2", "1" ], [ "14", "8", "2", "1" ] ] ] }, { "name": "StL.date", "index": "2", "type": "numeric", "distinct": "31", "missing": "0", "min": "1", "max": "31", "mean": "16", "stdev": "9" }, { "name": "StL.season.day", "index": "3", "type": "numeric", "distinct": "159", "missing": "0", "min": "1", "max": "181", "mean": "93", "stdev": "53" }, { "name": "StL.home", "index": "4", "type": "nominal", "distinct": "2", "missing": "0", "distr": [ [ "0", "1" ], [ [ "48", "28", "5", "0" ], [ "54", "23", "3", "2" ] ] ] }, { "name": "StL.runs", "index": "5", "type": "numeric", "distinct": "17", "missing": "0", "min": "0", "max": "16", "mean": "5", "stdev": "3" }, { "name": "StL.opp.runs", "index": "6", "type": "numeric", "distinct": "15", "missing": "0", "min": "0", "max": "14", "mean": "5", "stdev": "3" }, { "name": "StL.win", "index": "7", "type": "nominal", "distinct": "3", "missing": "0", "distr": [ [ "-1", "0", "1" ], [ [ "0", "1", "0", "0" ], [ "55", "21", "2", "1" ], [ "47", "29", "6", "1" ] ] ] }, { "name": "McGwire.HR", "index": "8", "type": "nominal", "distinct": "4", "missing": "0", "distr": [ [ "0", "1", "2", "3" ], [ [ "65", "35", "5", "0" ], [ "33", "12", "1", "2" ], [ "4", "3", "1", "0" ], [ "0", "1", "1", "0" ] ] ] }, { "name": "McGwire.out", "index": "9", "type": "nominal", "distinct": "2", "missing": "0", "distr": [ [ "0", "1" ], [ [ "100", "45", "8", "2" ], [ "2", "6", "0", "0" ] ] ] }, { "name": "Chi.month", "index": "10", "type": "nominal", "distinct": "7", "missing": "0", "distr": [ [ "April", "August", "July", "June", "March", "May", "September" ], [ [ "14", "12", "0", "0" ], [ "17", "10", "1", "0" ], [ "23", "3", "2", "0" ], [ "18", "8", "1", "0" ], [ "0", "1", "0", "0" ], [ "16", "9", "2", "1" ], [ "14", "8", "2", "1" ] ] ] }, { "name": "Chi.date", "index": "11", "type": "numeric", "distinct": "31", "missing": "0", "min": "1", "max": "31", "mean": "16", "stdev": "9" }, { "name": "Chi.season.day", "index": "12", "type": "numeric", "distinct": "161", "missing": "0", "min": "1", "max": "182", "mean": "92", "stdev": "53" }, { "name": "Chi.home", "index": "13", "type": "nominal", "distinct": "2", "missing": "0", "distr": [ [ "0", "1" ], [ [ "49", "25", "5", "2" ], [ "53", "26", "3", "0" ] ] ] }, { "name": "Chi.runs", "index": "14", "type": "numeric", "distinct": "15", "missing": "0", "min": "0", "max": "15", "mean": "5", "stdev": "3" }, { "name": "Chi.opp.runs", "index": "15", "type": "numeric", "distinct": "15", "missing": "0", "min": "0", "max": "16", "mean": "5", "stdev": "3" }, { "name": "Chi.win", "index": "16", "type": "nominal", "distinct": "2", "missing": "0", "distr": [ [ "0", "1" ], [ [ "43", "26", "3", "1" ], [ "59", "25", "5", "1" ] ] ] }, { "name": "Sosa.HR", "index": "17", "type": "nominal", "distinct": "4", "missing": "0", "distr": [ [ "0", "1", "2", "3" ], [ [ "70", "33", "5", "1" ], [ "25", "15", "2", "1" ], [ "6", "3", "1", "0" ], [ "1", "0", "0", "0" ] ] ] }, { "name": "Sosa.out", "index": "18", "type": "nominal", "distinct": "2", "missing": "0", "distr": [ [ "0", "1" ], [ [ "100", "50", "7", "2" ], [ "2", "1", "1", "0" ] ] ] }, { "name": "SF.month", "index": "19", "type": "nominal", "distinct": "7", "missing": "1", "distr": [ [ "April", "August", "July", "June", "May", "October", "September" ], [ [ "13", "11", "0", "0" ], [ "17", "10", "1", "0" ], [ "23", "2", "2", "0" ], [ "17", "9", "1", "0" ], [ "16", "10", "2", "1" ], [ "3", "2", "1", "0" ], [ "12", "7", "1", "1" ] ] ] }, { "name": "SF.date", "index": "20", "type": "numeric", "distinct": "31", "missing": "1", "min": "1", "max": "31", "mean": "15", "stdev": "9" }, { "name": "SF.season.day", "index": "21", "type": "numeric", "distinct": "162", "missing": "1", "min": "1", "max": "182", "mean": "92", "stdev": "52" }, { "name": "SF.home", "index": "22", "type": "nominal", "distinct": "2", "missing": "1", "distr": [ [ "0", "1" ], [ [ "52", "24", "3", "2" ], [ "49", "27", "5", "0" ] ] ] }, { "name": "SF.runs", "index": "23", "type": "numeric", "distinct": "13", "missing": "1", "min": "0", "max": "13", "mean": "5", "stdev": "3" }, { "name": "SF.opp.runs", "index": "24", "type": "numeric", "distinct": "15", "missing": "1", "min": "0", "max": "14", "mean": "5", "stdev": "3" }, { "name": "SF.win", "index": "25", "type": "nominal", "distinct": "2", "missing": "1", "distr": [ [ "0", "1" ], [ [ "49", "20", "3", "0" ], [ "52", "31", "5", "2" ] ] ] }, { "name": "Bonds.out", "index": "27", "type": "nominal", "distinct": "2", "missing": "1", "distr": [ [ "0", "1" ], [ [ "92", "51", "8", "2" ], [ "9", "0", "0", "0" ] ] ] } ], "nr_of_issues": 0, "nr_of_downvotes": 0, "nr_of_likes": 0, "nr_of_downloads": 0, "total_downloads": 0, "reach": 0, "reuse": 0, "impact_of_reuse": 0, "reach_of_reuse": 0, "impact": 0 }