{ "data_id": "1119", "name": "adult-census", "exact_name": "adult-census", "version": 1, "version_label": null, "description": "**Author**: Ronny Kohavi and Barry Becker \n**Source**: [MLRR](http:\/\/axon.cs.byu.edu:5000\/) \n**Please cite**: Ron Kohavi, \"Scaling Up the Accuracy of Naive-Bayes Classifiers: a Decision-Tree Hybrid\", Proceedings of the Second International Conference on Knowledge Discovery and Data Mining, 1996 \n\nDataset from the MLRR repository: http:\/\/axon.cs.byu.edu:5000\/\n\n**Note: this dataset is identical to the version stored in UCI, but only includes the training data, not the test data. See [adult (2)](http:\/\/openml.org\/d\/1590) for the complete data.**", "format": "ARFF", "uploader": "Joaquin Vanschoren", "uploader_id": 2, "visibility": "public", "creator": null, "contributor": null, "date": "2014-10-07 00:41:59", "update_comment": "ID is a row id", "last_update": "2015-04-15 17:40:41", "licence": "Public", "status": "active", "error_message": null, "url": "https:\/\/www.openml.org\/data\/download\/54002\/adult-census.arff", "default_target_attribute": "class", "row_id_attribute": "ID", "ignore_attribute": null, "runs": 68, "suggest": { "input": [ "adult-census", "Dataset from the MLRR repository: http:\/\/axon.cs.byu.edu:5000\/ " ], "weight": 5 }, "qualities": { "NumberOfInstances": 32561, "NumberOfFeatures": 16, "NumberOfClasses": 2, "NumberOfMissingValues": 4262, "NumberOfInstancesWithMissingValues": 2399, "NumberOfNumericFeatures": 7, "NumberOfSymbolicFeatures": 9, "REPTreeDepth3Kappa": 0.5399742129529471, "DecisionStumpKappa": 0, "MaxMeansOfNumericAtts": 189778.366512085, "MinMutualInformation": 0.00837794632851, "Quartile2SkewnessOfNumericAtts": 1.0028617319960136, "RandomTreeDepth1AUC": 0.7510664957902746, "Dimensionality": 0.000491385399711311, "MaxMutualInformation": 0.16536575798522, "MinNominalAttDistinctValues": 2, "PercentageOfBinaryFeatures": 12.5, "Quartile2StdDevOfNumericAtts": 208.30032560132008, "RandomTreeDepth1ErrRate": 0.1892448020638187, "EquivalentNumberOfAtts": 11.04040867270133, "MaxNominalAttDistinctValues": 41, "MinSkewnessOfNumericAtts": -0.3116758679102307, "PercentageOfInstancesWithMissingValues": 7.367709836921471, "Quartile3AttributeEntropy": 2.737119122229027, "RandomTreeDepth1Kappa": 0.47161656153513465, "J48.00001.AUC": 0.8838125025997338, "MaxSkewnessOfNumericAtts": 11.953847687705968, "MinStdDevOfNumericAtts": 2.5727203320673406, "PercentageOfMissingValues": 0.8180799115506281, "Quartile3KurtosisOfNumericAtts": 53.98246074862713, "AutoCorrelation": 0.6332002457002457, "RandomTreeDepth2AUC": 0.7510664957902746, "J48.00001.ErrRate": 0.14471300021498112, "MaxStdDevOfNumericAtts": 105549.97769702235, "MinorityClassPercentage": 24.080955744602438, "PercentageOfNumericFeatures": 43.75, "Quartile3MeansOfNumericAtts": 48252.828260802795, "CfsSubsetEval_DecisionStumpAUC": 0.8643764118076546, "RandomTreeDepth2ErrRate": 0.1892448020638187, "J48.00001.Kappa": 0.5750214906778297, "MeanAttributeEntropy": 1.7810031117434604, "MinorityClassSize": 7841, "PercentageOfSymbolicFeatures": 56.25, "Quartile3MutualInformation": 0.1407936091163975, "CfsSubsetEval_DecisionStumpErrRate": 0.14680138816375418, "RandomTreeDepth2Kappa": 0.47161656153513465, "J48.0001.AUC": 0.8838125025997338, "MeanKurtosisOfNumericAtts": 30.794842326172216, "NaiveBayesAUC": 0.8916418044023723, "Quartile1AttributeEntropy": 0.8414122063306055, "Quartile3SkewnessOfNumericAtts": 6.434433763183222, "CfsSubsetEval_DecisionStumpKappa": 0.5541727897584204, "RandomTreeDepth3AUC": 0.7510664957902746, "J48.0001.ErrRate": 0.14471300021498112, "MeanMeansOfNumericAtts": 31838.7364945794, "NaiveBayesErrRate": 0.1676852676514849, "Quartile1KurtosisOfNumericAtts": 0.42605119117947665, "Quartile3StdDevOfNumericAtts": 31926.463487885063, "CfsSubsetEval_NaiveBayesAUC": 0.8643764118076546, "RandomTreeDepth3ErrRate": 0.1892448020638187, "J48.0001.Kappa": 0.5750214906778297, "MeanMutualInformation": 0.07213355762557624, "NaiveBayesKappa": 0.49334627025060346, "Quartile1MeansOfNumericAtts": 31.456404901569346, "REPTreeDepth1AUC": 0.8679141050597956, "CfsSubsetEval_NaiveBayesErrRate": 0.14680138816375418, "RandomTreeDepth3Kappa": 0.47161656153513465, "J48.001.AUC": 0.8838125025997338, "MeanNoiseToSignalRatio": 23.690354536346533, "NumberOfBinaryFeatures": 2, "Quartile1MutualInformation": 0.011049414032615, "REPTreeDepth1ErrRate": 0.1581339639445963, "CfsSubsetEval_NaiveBayesKappa": 0.5541727897584204, "CfsSubsetEval_kNN1NAUC": 0.8643764118076546, "StdvNominalAttDistinctValues": 12.152960316089429, "J48.001.ErrRate": 0.14471300021498112, "MeanNominalAttDistinctValues": 11.222222222222221, "Quartile1SkewnessOfNumericAtts": 0.09281293562589449, "REPTreeDepth1Kappa": 0.5399742129529471, "CfsSubsetEval_kNN1NErrRate": 0.14680138816375418, "kNN1NAUC": 0.7106974391723201, "J48.001.Kappa": 0.5750214906778297, "MeanSkewnessOfNumericAtts": 3.0783611570446676, "Quartile1StdDevOfNumericAtts": 9.903751594314942, "REPTreeDepth2AUC": 0.8679141050597956, "CfsSubsetEval_kNN1NKappa": 0.5541727897584204, "kNN1NErrRate": 0.21022081631399528, "MajorityClassPercentage": 75.91904425539757, "MeanStdDevOfNumericAtts": 18894.465097013013, "Quartile2AttributeEntropy": 1.596256941021621, "REPTreeDepth2ErrRate": 0.1581339639445963, "ClassEntropy": 0.7963839552022132, "kNN1NKappa": 0.42373182625446715, "MajorityClassSize": 24720, "MinAttributeEntropy": 0.7987406510139587, "Quartile2KurtosisOfNumericAtts": 4.5677488870776175, "REPTreeDepth2Kappa": 0.5399742129529471, "REPTreeDepth3AUC": 0.8679141050597956, "DecisionStumpAUC": 0.7580616770861321, "MaxAttributeEntropy": 3.43862201924089, "MinKurtosisOfNumericAtts": -0.16612745957135688, "Quartile2MeansOfNumericAtts": 63.87064279352589, "REPTreeDepth3ErrRate": 0.1581339639445963, "DecisionStumpErrRate": 0.2408095574460244, "MaxKurtosisOfNumericAtts": 154.79943785427523, "MinMeansOfNumericAtts": 10.0806793403151, "Quartile2MutualInformation": 0.063198103835495 }, "tags": [ { "uploader": "38960", "tag": "Chemistry" }, { "uploader": "2", "tag": "derived" }, { "uploader": "38960", "tag": "Life Science" }, { "uploader": "1", "tag": "mythbusting_1" }, { "uploader": "2", "tag": "study_1" }, { "uploader": "939", "tag": "study_15" }, { "uploader": "939", "tag": "study_20" } ], "features": [ { "name": "class", "index": "15", "type": "nominal", "distinct": "2", "missing": "0", "target": "1", "distr": [ [ "<=50K", ">50K" ], [ [ "24720", "0" ], [ "0", "7841" ] ] ] }, { "name": "ID", "index": "0", "type": "numeric", "distinct": "32561", "missing": "0", "identifier": "1", "min": "1", "max": "32561", "mean": "16281", "stdev": "9400" }, { "name": "age", "index": "1", "type": "numeric", "distinct": "73", "missing": "0", "min": "17", "max": "90", "mean": "39", "stdev": "14" }, { "name": "workclass", "index": "2", "type": "nominal", "distinct": "8", "missing": "1836", "distr": [ [ "Private", "Self-emp-not-inc", "Self-emp-inc", "Federal-gov", "Local-gov", "State-gov", "Without-pay", "Never-worked" ], [ [ "17733", "4963" ], [ "1817", "724" ], [ "494", "622" ], [ "589", "371" ], [ "1476", "617" ], [ "945", "353" ], [ "14", "0" ], [ "7", "0" ] ] ] }, { "name": "fnlwgt:", "index": "3", "type": "numeric", "distinct": "21648", "missing": "0", "min": "12285", "max": "1484705", "mean": "189778", "stdev": "105550" }, { "name": "education:", "index": "4", "type": "nominal", "distinct": "16", "missing": "0", "distr": [ [ "Bachelors", "Some-college", "11th", "HS-grad", "Prof-school", "Assoc-acdm", "Assoc-voc", "9th", "7th-8th", "12th", "Masters", "1st-4th", "10th", "Doctorate", "5th-6th", "Preschool" ], [ [ "3134", "2221" ], [ "5904", "1387" ], [ "1115", "60" ], [ "8826", "1675" ], [ "153", "423" ], [ "802", "265" ], [ "1021", "361" ], [ "487", "27" ], [ "606", "40" ], [ "400", "33" ], [ "764", "959" ], [ "162", "6" ], [ "871", "62" ], [ "107", "306" ], [ "317", "16" ], [ "51", "0" ] ] ] }, { "name": "education-num:", "index": "5", "type": "numeric", "distinct": "16", "missing": "0", "min": "1", "max": "16", "mean": "10", "stdev": "3" }, { "name": "marital-status:", "index": "6", "type": "nominal", "distinct": "7", "missing": "0", "distr": [ [ "Married-civ-spouse", "Divorced", "Never-married", "Separated", "Widowed", "Married-spouse-absent", "Married-AF-spouse" ], [ [ "8284", "6692" ], [ "3980", "463" ], [ "10192", "491" ], [ "959", "66" ], [ "908", "85" ], [ "384", "34" ], [ "13", "10" ] ] ] }, { "name": "occupation:", "index": "7", "type": "nominal", "distinct": "14", "missing": "1843", "distr": [ [ "Tech-support", "Craft-repair", "Other-service", "Sales", "Exec-managerial", "Prof-specialty", "Handlers-cleaners", "Machine-op-inspct", "Adm-clerical", "Farming-fishing", "Transport-moving", "Priv-house-serv", "Protective-serv", "Armed-Forces" ], [ [ "645", "283" ], [ "3170", "929" ], [ "3158", "137" ], [ "2667", "983" ], [ "2098", "1968" ], [ "2281", "1859" ], [ "1284", "86" ], [ "1752", "250" ], [ "3263", "507" ], [ "879", "115" ], [ "1277", "320" ], [ "148", "1" ], [ "438", "211" ], [ "8", "1" ] ] ] }, { "name": "relationship:", "index": "8", "type": "nominal", "distinct": "6", "missing": "0", "distr": [ [ "Wife", "Own-child", "Husband", "Not-in-family", "Other-relative", "Unmarried" ], [ [ "823", "745" ], [ "5001", "67" ], [ "7275", "5918" ], [ "7449", "856" ], [ "944", "37" ], [ "3228", "218" ] ] ] }, { "name": "race:", "index": "9", "type": "nominal", "distinct": "5", "missing": "0", "distr": [ [ "White", "Asian-Pac-Islander", "Amer-Indian-Eskimo", "Other", "Black" ], [ [ "20699", "7117" ], [ "763", "276" ], [ "275", "36" ], [ "246", "25" ], [ "2737", "387" ] ] ] }, { "name": "sex:", "index": "10", "type": "nominal", "distinct": "2", "missing": "0", "distr": [ [ "Female", "Male" ], [ [ "9592", "1179" ], [ "15128", "6662" ] ] ] }, { "name": "capital-gain:", "index": "11", "type": "numeric", "distinct": "119", "missing": "0", "min": "0", "max": "99999", "mean": "1078", "stdev": "7385" }, { "name": "capital-loss:", "index": "12", "type": "numeric", "distinct": "92", "missing": "0", "min": "0", "max": "4356", "mean": "87", "stdev": "403" }, { "name": "hours-per-week:", "index": "13", "type": "numeric", "distinct": "94", "missing": "0", "min": "1", "max": "99", "mean": "40", "stdev": "12" }, { "name": "native-country:", "index": "14", "type": "nominal", "distinct": "41", "missing": "583", "distr": [ [ "United-States", "Cambodia", "England", "Puerto-Rico", "Canada", "Germany", "Outlying-US(Guam-USVI-etc)", "India", "Japan", "Greece", "South", "China", "Cuba", "Iran", "Honduras", "Philippines", "Italy", "Poland", "Jamaica", "Vietnam", "Mexico", "Portugal", "Ireland", "France", "Dominican-Republic", "Laos", "Ecuador", "Taiwan", "Haiti", "Columbia", "Hungary", "Guatemala", "Nicaragua", "Scotland", "Thailand", "Yugoslavia", "El-Salvador", "Trinadad&Tobago", "Peru", "Hong", "Holand-Netherlands" ], [ [ "21999", "7171" ], [ "12", "7" ], [ "60", "30" ], [ "102", "12" ], [ "82", "39" ], [ "93", "44" ], [ "14", "0" ], [ "60", "40" ], [ "38", "24" ], [ "21", "8" ], [ "64", "16" ], [ "55", "20" ], [ "70", "25" ], [ "25", "18" ], [ "12", "1" ], [ "137", "61" ], [ "48", "25" ], [ "48", "12" ], [ "71", "10" ], [ "62", "5" ], [ "610", "33" ], [ "33", "4" ], [ "19", "5" ], [ "17", "12" ], [ "68", "2" ], [ "16", "2" ], [ "24", "4" ], [ "31", "20" ], [ "40", "4" ], [ "57", "2" ], [ "10", "3" ], [ "61", "3" ], [ "32", "2" ], [ "9", "3" ], [ "15", "3" ], [ "10", "6" ], [ "97", "9" ], [ "17", "2" ], [ "29", "2" ], [ "14", "6" ], [ "1", "0" ] ] ] } ], "nr_of_issues": 0, "nr_of_downvotes": 0, "nr_of_likes": 0, "nr_of_downloads": 0, "total_downloads": 0, "reach": 0, "reuse": 0, "impact_of_reuse": 0, "reach_of_reuse": 0, "impact": 0 }