{ "data_id": "179", "name": "adult", "exact_name": "adult", "version": 1, "version_label": "1", "description": "**Author**: Ronny Kohavi and Barry Becker \n**Source**: [UCI](https:\/\/archive.ics.uci.edu\/ml\/datasets\/Adult) - 1996-05-01 \n**Please cite**: Ron Kohavi, \"Scaling Up the Accuracy of Naive-Bayes Classifiers: a Decision-Tree Hybrid\", Proceedings of the Second International Conference on Knowledge Discovery and Data Mining, 1996 \n\n**Note: This dataset is not the original UCI dataset. It has some discretized features. See version 2 for the original.**\n\nPrediction task is to determine whether a person makes over 50K a year. Extraction was done by Barry Becker from the 1994 Census database. A set of reasonably clean records was extracted using the following conditions: ((AAGE>16) && (AGI>100) && (AFNLWGT>1)&& (HRSWK>0))\n\nRonny Kohavi and Barry Becker. Data Mining and Visualization, Silicon Graphics. \ne-mail: ronnyk '@' live.com for questions. \n\n", "format": "ARFF", "uploader": "Jan van Rijn", "uploader_id": 1, "visibility": "public", "creator": null, "contributor": null, "date": "2014-04-23 13:13:24", "update_comment": null, "last_update": "2014-04-23 13:13:24", "licence": "Public", "status": "active", "error_message": null, "url": "https:\/\/www.openml.org\/data\/download\/3608\/dataset_183_adult.arff", "default_target_attribute": "class", "row_id_attribute": null, "ignore_attribute": null, "runs": 2671, "suggest": { "input": [ "adult", "Prediction task is to determine whether a person makes over 50K a year. Extraction was done by Barry Becker from the 1994 Census database. A set of reasonably clean records was extracted using the following conditions: ((AAGE>16) && (AGI>100) && (AFNLWGT>1)&& (HRSWK>0)) Ronny Kohavi and Barry Becker. Data Mining and Visualization, Silicon Graphics. e-mail: ronnyk '@' live.com for questions. " ], "weight": 5 }, "qualities": { "NumberOfInstances": 48842, "NumberOfFeatures": 15, "NumberOfClasses": 2, "NumberOfMissingValues": 6465, "NumberOfInstancesWithMissingValues": 3620, "NumberOfNumericFeatures": 2, "NumberOfSymbolicFeatures": 13, "Quartile2SkewnessOfNumericAtts": 0.5611835109304475, "REPTreeDepth3Kappa": 0.5382480723298376, "DecisionStumpKappa": 0, "MaxMeansOfNumericAtts": 189664.13459727284, "MinMutualInformation": 0.00818704228545, "PercentageOfBinaryFeatures": 13.333333333333334, "Quartile2StdDevOfNumericAtts": 52803.29819795652, "RandomTreeDepth1AUC": 0.7414420650537187, "Dimensionality": 0.00030711273084640267, "MaxMutualInformation": 0.16542318099233, "MinNominalAttDistinctValues": 2, "PercentageOfInstancesWithMissingValues": 7.411653904426519, "Quartile3AttributeEntropy": 2.2474183457443537, "RandomTreeDepth1ErrRate": 0.20070840669915238, "EquivalentNumberOfAtts": 11.813422825673618, "MaxNominalAttDistinctValues": 41, "MinSkewnessOfNumericAtts": -0.3165248566609311, "PercentageOfMissingValues": 0.8824372466319971, "Quartile3KurtosisOfNumericAtts": 6.05784821246452, "AutoCorrelation": 0.634425994553756, "RandomTreeDepth1Kappa": 0.43929310547911604, "J48.00001.AUC": 0.8696320344865459, "MaxSkewnessOfNumericAtts": 1.438891878521826, "MinStdDevOfNumericAtts": 2.5709727555918316, "PercentageOfNumericFeatures": 13.333333333333334, "Quartile3MeansOfNumericAtts": 189664.13459727284, "CfsSubsetEval_DecisionStumpAUC": 0.8617329193273937, "RandomTreeDepth2AUC": 0.7414420650537187, "J48.00001.ErrRate": 0.1499529093812702, "MaxStdDevOfNumericAtts": 105604.02542315745, "MinorityClassPercentage": 23.928176569346054, "PercentageOfSymbolicFeatures": 86.66666666666667, "Quartile3MutualInformation": 0.09106812244262, "CfsSubsetEval_DecisionStumpErrRate": 0.15744646001392246, "RandomTreeDepth2ErrRate": 0.20070840669915238, "J48.00001.Kappa": 0.5569155546878475, "MeanAttributeEntropy": 1.6017062538629998, "MinorityClassSize": 11687, "Quartile1AttributeEntropy": 0.7981454034288663, "Quartile3SkewnessOfNumericAtts": 1.438891878521826, "CfsSubsetEval_DecisionStumpKappa": 0.5309858197211614, "RandomTreeDepth2Kappa": 0.43929310547911604, "J48.0001.AUC": 0.8696320344865459, "MeanKurtosisOfNumericAtts": 3.341796742625668, "NaiveBayesAUC": 0.9016854965859895, "Quartile1KurtosisOfNumericAtts": 0.6257452727868169, "Quartile3StdDevOfNumericAtts": 105604.02542315745, "CfsSubsetEval_NaiveBayesAUC": 0.8617329193273937, "RandomTreeDepth3AUC": 0.7414420650537187, "J48.0001.ErrRate": 0.1499529093812702, "MeanMeansOfNumericAtts": 94837.1063429016, "NaiveBayesErrRate": 0.1755661111338602, "Quartile1MeansOfNumericAtts": 10.078088530363212, "REPTreeDepth1AUC": 0.8736037697727678, "CfsSubsetEval_NaiveBayesErrRate": 0.15744646001392246, "RandomTreeDepth3ErrRate": 0.20070840669915238, "J48.0001.Kappa": 0.5569155546878475, "MeanMutualInformation": 0.06719846153641416, "NaiveBayesKappa": 0.5596958713808287, "Quartile1MutualInformation": 0.016285621608094998, "REPTreeDepth1ErrRate": 0.1575078825600917, "CfsSubsetEval_NaiveBayesKappa": 0.5309858197211614, "RandomTreeDepth3Kappa": 0.43929310547911604, "J48.001.AUC": 0.8696320344865459, "MeanNoiseToSignalRatio": 22.835460176347215, "NumberOfBinaryFeatures": 2, "Quartile1SkewnessOfNumericAtts": -0.3165248566609311, "REPTreeDepth1Kappa": 0.5382480723298376, "CfsSubsetEval_kNN1NAUC": 0.8617329193273937, "StdvNominalAttDistinctValues": 10.363273416128512, "J48.001.ErrRate": 0.1499529093812702, "MeanNominalAttDistinctValues": 9.307692307692308, "Quartile1StdDevOfNumericAtts": 2.5709727555918316, "REPTreeDepth2AUC": 0.8736037697727678, "CfsSubsetEval_kNN1NErrRate": 0.15744646001392246, "kNN1NAUC": 0.7147655570520343, "J48.001.Kappa": 0.5569155546878475, "MeanSkewnessOfNumericAtts": 0.5611835109304475, "Quartile2AttributeEntropy": 1.5641796464106261, "REPTreeDepth2ErrRate": 0.1575078825600917, "CfsSubsetEval_kNN1NKappa": 0.5309858197211614, "kNN1NErrRate": 0.20513083002334057, "MajorityClassPercentage": 76.07182343065395, "MeanStdDevOfNumericAtts": 52803.29819795652, "Quartile2KurtosisOfNumericAtts": 3.3417967426256685, "REPTreeDepth2Kappa": 0.5382480723298376, "ClassEntropy": 0.7938438393644257, "kNN1NKappa": 0.4315835543870782, "MajorityClassSize": 37155, "MinAttributeEntropy": 0.3606792438308567, "Quartile2MeansOfNumericAtts": 94837.1063429016, "REPTreeDepth3AUC": 0.8736037697727678, "DecisionStumpAUC": 0.7593631363767562, "MaxAttributeEntropy": 3.44192266924963, "MinKurtosisOfNumericAtts": 0.6257452727868169, "Quartile2MutualInformation": 0.06527298792667, "REPTreeDepth3ErrRate": 0.1575078825600917, "DecisionStumpErrRate": 0.23928176569346055, "MaxKurtosisOfNumericAtts": 6.05784821246452, "MinMeansOfNumericAtts": 10.078088530363212 }, "tags": [ { "uploader": "38960", "tag": "Demographics" }, { "uploader": "2", "tag": "derived" }, { "uploader": "38960", "tag": "Machine Learning" }, { "uploader": "1", "tag": "mythbusting_1" }, { "uploader": "2", "tag": "study_1" }, { "uploader": "939", "tag": "study_15" }, { "uploader": "1", "tag": "study_16" }, { "uploader": "939", "tag": "study_20" }, { "uploader": "1", "tag": "study_37" }, { "uploader": "1", "tag": "study_41" } ], "features": [ { "name": "class", "index": "14", "type": "nominal", "distinct": "2", "missing": "0", "target": "1", "distr": [ [ ">50K", "<=50K" ], [ [ "11687", "0" ], [ "0", "37155" ] ] ] }, { "name": "age", "index": "0", "type": "nominal", "distinct": "5", "missing": "0", "distr": [ [ "0", "1", "2", "3", "4" ], [ [ "169", "9458" ], [ "2379", "10340" ], [ "4131", "7821" ], [ "3255", "5041" ], [ "1753", "4495" ] ] ] }, { "name": "workclass", "index": "1", "type": "nominal", "distinct": "8", "missing": "2799", "distr": [ [ "Private", "Self-emp-not-inc", "Self-emp-inc", "Federal-gov", "Local-gov", "State-gov", "Without-pay", "Never-worked" ], [ [ "7387", "26519" ], [ "1077", "2785" ], [ "938", "757" ], [ "561", "871" ], [ "927", "2209" ], [ "530", "1451" ], [ "2", "19" ], [ "0", "10" ] ] ] }, { "name": "fnlwgt", "index": "2", "type": "numeric", "distinct": "28523", "missing": "0", "min": "12285", "max": "1490400", "mean": "189664", "stdev": "105604" }, { "name": "education", "index": "3", "type": "nominal", "distinct": "16", "missing": "0", "distr": [ [ "Bachelors", "Some-college", "11th", "HS-grad", "Prof-school", "Assoc-acdm", "Assoc-voc", "9th", "7th-8th", "12th", "Masters", "1st-4th", "10th", "Doctorate", "5th-6th", "Preschool" ], [ [ "3313", "4712" ], [ "2063", "8815" ], [ "92", "1720" ], [ "2503", "13281" ], [ "617", "217" ], [ "413", "1188" ], [ "522", "1539" ], [ "41", "715" ], [ "62", "893" ], [ "48", "609" ], [ "1459", "1198" ], [ "8", "239" ], [ "87", "1302" ], [ "431", "163" ], [ "27", "482" ], [ "1", "82" ] ] ] }, { "name": "education-num", "index": "4", "type": "numeric", "distinct": "16", "missing": "0", "min": "1", "max": "16", "mean": "10", "stdev": "3" }, { "name": "marital-status", "index": "5", "type": "nominal", "distinct": "7", "missing": "0", "distr": [ [ "Married-civ-spouse", "Divorced", "Never-married", "Separated", "Widowed", "Married-spouse-absent", "Married-AF-spouse" ], [ [ "9984", "12395" ], [ "671", "5962" ], [ "733", "15384" ], [ "99", "1431" ], [ "128", "1390" ], [ "58", "570" ], [ "14", "23" ] ] ] }, { "name": "occupation", "index": "6", "type": "nominal", "distinct": "14", "missing": "2809", "distr": [ [ "Tech-support", "Craft-repair", "Other-service", "Sales", "Exec-managerial", "Prof-specialty", "Handlers-cleaners", "Machine-op-inspct", "Adm-clerical", "Farming-fishing", "Transport-moving", "Priv-house-serv", "Protective-serv", "Armed-Forces" ], [ [ "420", "1026" ], [ "1383", "4729" ], [ "204", "4719" ], [ "1475", "4029" ], [ "2908", "3178" ], [ "2784", "3388" ], [ "138", "1934" ], [ "372", "2650" ], [ "768", "4843" ], [ "173", "1317" ], [ "481", "1874" ], [ "3", "239" ], [ "308", "675" ], [ "5", "10" ] ] ] }, { "name": "relationship", "index": "7", "type": "nominal", "distinct": "6", "missing": "0", "distr": [ [ "Wife", "Own-child", "Husband", "Not-in-family", "Other-relative", "Unmarried" ], [ [ "1093", "1238" ], [ "111", "7470" ], [ "8846", "10870" ], [ "1276", "11307" ], [ "52", "1454" ], [ "309", "4816" ] ] ] }, { "name": "race", "index": "8", "type": "nominal", "distinct": "5", "missing": "0", "distr": [ [ "White", "Asian-Pac-Islander", "Amer-Indian-Eskimo", "Other", "Black" ], [ [ "10607", "31155" ], [ "409", "1110" ], [ "55", "415" ], [ "50", "356" ], [ "566", "4119" ] ] ] }, { "name": "sex", "index": "9", "type": "nominal", "distinct": "2", "missing": "0", "distr": [ [ "Female", "Male" ], [ [ "1769", "14423" ], [ "9918", "22732" ] ] ] }, { "name": "capitalgain", "index": "10", "type": "nominal", "distinct": "5", "missing": "0", "distr": [ [ "0", "1", "2", "3", "4" ], [ [ "9196", "35611" ], [ "142", "946" ], [ "702", "577" ], [ "531", "3" ], [ "1116", "18" ] ] ] }, { "name": "capitalloss", "index": "11", "type": "nominal", "distinct": "5", "missing": "0", "distr": [ [ "0", "1", "2", "3", "4" ], [ [ "10543", "36017" ], [ "90", "297" ], [ "308", "471" ], [ "550", "245" ], [ "196", "125" ] ] ] }, { "name": "hoursperweek", "index": "12", "type": "nominal", "distinct": "5", "missing": "0", "distr": [ [ "0", "1", "2", "3", "4" ], [ [ "301", "4152" ], [ "563", "5316" ], [ "6371", "21438" ], [ "3853", "5172" ], [ "599", "1077" ] ] ] }, { "name": "native-country", "index": "13", "type": "nominal", "distinct": "41", "missing": "857", "distr": [ [ "United-States", "Cambodia", "England", "Puerto-Rico", "Canada", "Germany", "Outlying-US(Guam-USVI-etc)", "India", "Japan", "Greece", "South", "China", "Cuba", "Iran", "Honduras", "Philippines", "Italy", "Poland", "Jamaica", "Vietnam", "Mexico", "Portugal", "Ireland", "France", "Dominican-Republic", "Laos", "Ecuador", "Taiwan", "Haiti", "Columbia", "Hungary", "Guatemala", "Nicaragua", "Scotland", "Thailand", "Yugoslavia", "El-Salvador", "Trinadad&Tobago", "Peru", "Hong", "Holand-Netherlands" ], [ [ "10694", "33138" ], [ "9", "19" ], [ "47", "80" ], [ "20", "164" ], [ "63", "119" ], [ "58", "148" ], [ "1", "22" ], [ "62", "89" ], [ "32", "60" ], [ "18", "31" ], [ "20", "95" ], [ "36", "86" ], [ "34", "104" ], [ "22", "37" ], [ "2", "18" ], [ "85", "210" ], [ "34", "71" ], [ "17", "70" ], [ "15", "91" ], [ "7", "79" ], [ "47", "904" ], [ "12", "55" ], [ "11", "26" ], [ "16", "22" ], [ "5", "98" ], [ "2", "21" ], [ "6", "39" ], [ "26", "39" ], [ "9", "66" ], [ "4", "81" ], [ "6", "13" ], [ "3", "85" ], [ "3", "46" ], [ "3", "18" ], [ "5", "25" ], [ "8", "15" ], [ "11", "144" ], [ "2", "25" ], [ "4", "42" ], [ "8", "22" ], [ "0", "1" ] ] ] } ], "nr_of_issues": 0, "nr_of_downvotes": 0, "nr_of_likes": 0, "nr_of_downloads": 0, "total_downloads": 0, "reach": 0, "reuse": 0, "impact_of_reuse": 0, "reach_of_reuse": 0, "impact": 0 }