{ "data_id": "1037", "name": "ada_prior", "exact_name": "ada_prior", "version": 1, "version_label": null, "description": "**Author**: \r\n**Source**: Unknown - Date unknown \r\n**Please cite**: \r\n\r\nDatasets from the Agnostic Learning vs. Prior Knowledge Challenge (http:\/\/www.agnostic.inf.ethz.ch)\r\n\r\nDataset from: http:\/\/www.agnostic.inf.ethz.ch\/datasets.php\r\n\r\nModified by TunedIT (converted to ARFF format)\r\n\r\n\r\nADA is the marketing database\r\n\r\nThe task of ADA is to discover high revenue people from census data. This is a two-class classification problem. The raw data from the census bureau is known as the Adult database in the UCI machine-learning repository. The 14 original attributes (features) include age, workclass, education, education,\r\nmarital status, occupation, native country, etc. It contains continuous, binary and categorical features. This dataset is from \"prior knowledge track\", i.e. has access to the original features and their identity.\r\n\r\n\r\nNumber of examples:\r\nPos_ex Neg_ex Tot_ex\r\nTrain 1029 3118 4147\r\nValid 103 312 415\r\n\r\nThis dataset contains samples from both training and validation datasets.\r\n\r\n### Attribute information \r\n1. age Instance\u2019s age (numeric)\r\n2. workclass Instance\u2019s work class (nominal)\r\n3. fnlwgt Instance\u2019s sampling weight (numeric)\r\n4. education Instance\u2019s education level (nominal)\r\n5. educationNum Instance\u2019s education level (numeric version)\r\n6. maritalStatus Instance\u2019s marital status (nominal)\r\n7. occupation Instance\u2019s occupation (nominal)\r\n8. relationship Instance\u2019s type of relationship (nominal)\r\n9. race Instance\u2019s race (nominal)\r\n10. sex Instance\u2019s sex (nominal)\r\n11. capitalGain Instance\u2019s capital gain (numeric)\r\n12. capitalLoss Instance\u2019s capital loss (numeric)\r\n13. hoursPerWeek Instance\u2019s number of working hours (numeric)\r\n14. nativeCountry Instance\u2019s native country (numeric)\r\n15. label Class attribute (1: the instance earns more than 50K a year; -1 otherwise)", "format": "ARFF", "uploader": "Joaquin Vanschoren", "uploader_id": 2, "visibility": "public", "creator": null, "contributor": null, "date": "2014-10-06 23:55:58", "update_comment": null, "last_update": "2014-10-06 23:55:58", "licence": "Public", "status": "active", "error_message": null, "url": "https:\/\/www.openml.org\/data\/download\/53920\/ada_prior.arff", "default_target_attribute": "label", "row_id_attribute": null, "ignore_attribute": null, "runs": 778, "suggest": { "input": [ "ada_prior", "Datasets from the Agnostic Learning vs. Prior Knowledge Challenge (http:\/\/www.agnostic.inf.ethz.ch) Dataset from: http:\/\/www.agnostic.inf.ethz.ch\/datasets.php Modified by TunedIT (converted to ARFF format) ADA is the marketing database The task of ADA is to discover high revenue people from census data. This is a two-class classification problem. The raw data from the census bureau is known as the Adult database in the UCI machine-learning repository. The 14 original attributes (features) includ " ], "weight": 5 }, "qualities": { "NumberOfInstances": 4562, "NumberOfFeatures": 15, "NumberOfClasses": 2, "NumberOfMissingValues": 88, "NumberOfInstancesWithMissingValues": 88, "NumberOfNumericFeatures": 6, "NumberOfSymbolicFeatures": 9, "REPTreeDepth3Kappa": 0.47772171057022844, "DecisionStumpKappa": 0, "MaxMeansOfNumericAtts": 191077.22709338015, "MinMutualInformation": 0.00826636529637, "Quartile2SkewnessOfNumericAtts": 0.9937119761197701, "RandomTreeDepth1AUC": 0.7236419164587357, "Dimensionality": 0.003288031565103025, "MaxMutualInformation": 0.15591799834524, "MinNominalAttDistinctValues": 2, "PercentageOfBinaryFeatures": 13.333333333333334, "Quartile2StdDevOfNumericAtts": 218.02938312909245, "RandomTreeDepth1ErrRate": 0.21372205173169662, "EquivalentNumberOfAtts": 11.317450768757334, "MaxNominalAttDistinctValues": 39, "MinSkewnessOfNumericAtts": -0.28797733757910504, "PercentageOfInstancesWithMissingValues": 1.9289785181937746, "Quartile3AttributeEntropy": 2.7045869245807856, "RandomTreeDepth1Kappa": 0.4256884391900772, "J48.00001.AUC": 0.8371273269529922, "MaxSkewnessOfNumericAtts": 10.753121940290164, "MinStdDevOfNumericAtts": 2.530240304710453, "PercentageOfMissingValues": 0.12859856787958499, "Quartile3KurtosisOfNumericAtts": 43.8826411495145, "AutoCorrelation": 0.6215742161806621, "RandomTreeDepth2AUC": 0.7236419164587357, "J48.00001.ErrRate": 0.1600175361683472, "MaxStdDevOfNumericAtts": 108007.33092017627, "MinorityClassPercentage": 24.813678211310826, "PercentageOfNumericFeatures": 40, "Quartile3MeansOfNumericAtts": 48677.14165935994, "CfsSubsetEval_DecisionStumpAUC": 0.8530674056598914, "RandomTreeDepth2ErrRate": 0.21372205173169662, "J48.00001.Kappa": 0.545313793638276, "MeanAttributeEntropy": 1.7900302461856028, "MinorityClassSize": 1132, "PercentageOfSymbolicFeatures": 60, "Quartile3MutualInformation": 0.13451648790865, "CfsSubsetEval_DecisionStumpErrRate": 0.15366067514248136, "RandomTreeDepth2Kappa": 0.4256884391900772, "J48.0001.AUC": 0.8371273269529922, "MeanKurtosisOfNumericAtts": 24.636272437238887, "NaiveBayesAUC": 0.8829097709408578, "Quartile1AttributeEntropy": 0.9018541007621855, "Quartile3SkewnessOfNumericAtts": 5.948513280890106, "CfsSubsetEval_DecisionStumpKappa": 0.5557098903932444, "RandomTreeDepth3AUC": 0.7236419164587357, "J48.0001.ErrRate": 0.1600175361683472, "MeanMeansOfNumericAtts": 32078.8478737396, "NaiveBayesErrRate": 0.1716352476983779, "Quartile1KurtosisOfNumericAtts": 0.4995135461287261, "Quartile3StdDevOfNumericAtts": 33323.74726030333, "CfsSubsetEval_NaiveBayesAUC": 0.8530674056598914, "RandomTreeDepth3ErrRate": 0.21372205173169662, "J48.0001.Kappa": 0.545313793638276, "MeanMutualInformation": 0.0714217037438, "NaiveBayesKappa": 0.49173077103676766, "Quartile1MeansOfNumericAtts": 31.46854449802718, "REPTreeDepth1AUC": 0.8382690405793817, "CfsSubsetEval_NaiveBayesErrRate": 0.15366067514248136, "RandomTreeDepth3Kappa": 0.4256884391900772, "J48.001.AUC": 0.8371273269529922, "MeanNoiseToSignalRatio": 24.06283317752683, "NumberOfBinaryFeatures": 2, "Quartile1MutualInformation": 0.017670912822822498, "REPTreeDepth1ErrRate": 0.17821131082858396, "CfsSubsetEval_NaiveBayesKappa": 0.5557098903932444, "CfsSubsetEval_kNN1NAUC": 0.8530674056598914, "StdvNominalAttDistinctValues": 11.58063517736014, "J48.001.ErrRate": 0.1600175361683472, "MeanNominalAttDistinctValues": 10.88888888888889, "Quartile1SkewnessOfNumericAtts": 0.07175642891950362, "REPTreeDepth1Kappa": 0.47772171057022844, "CfsSubsetEval_kNN1NErrRate": 0.15366067514248136, "kNN1NAUC": 0.6900861758130815, "J48.001.Kappa": 0.545313793638276, "MeanSkewnessOfNumericAtts": 2.831868883409954, "Quartile1StdDevOfNumericAtts": 9.528459373210907, "REPTreeDepth2AUC": 0.8382690405793817, "CfsSubsetEval_kNN1NKappa": 0.5557098903932444, "kNN1NErrRate": 0.22884699693117053, "MajorityClassPercentage": 75.18632178868917, "MeanStdDevOfNumericAtts": 19481.166749913486, "Quartile2AttributeEntropy": 1.624541422254258, "REPTreeDepth2ErrRate": 0.17821131082858396, "ClassEntropy": 0.8083116159412278, "kNN1NKappa": 0.38266361320675, "MajorityClassSize": 3430, "MinAttributeEntropy": 0.8223567833790162, "Quartile2KurtosisOfNumericAtts": 3.771142714950889, "REPTreeDepth2Kappa": 0.47772171057022844, "REPTreeDepth3AUC": 0.8382690405793817, "DecisionStumpAUC": 0.7451055434793807, "MaxAttributeEntropy": 3.4002236588905155, "MinKurtosisOfNumericAtts": -0.05077619416414736, "Quartile2MeansOfNumericAtts": 68.34294169224032, "REPTreeDepth3ErrRate": 0.17821131082858396, "DecisionStumpErrRate": 0.24813678211310827, "MaxKurtosisOfNumericAtts": 121.69949059317503, "MinMeansOfNumericAtts": 10.154537483559842, "Quartile2MutualInformation": 0.06283142584161999 }, "tags": [ { "uploader": "38960", "tag": "Chemistry" }, { "uploader": "2", "tag": "derived" }, { "uploader": "2", "tag": "invalid_ARFF" }, { "uploader": "38960", "tag": "Life Science" }, { "uploader": "1", "tag": "mythbusting_1" }, { "uploader": "5824", "tag": "study_144" }, { "uploader": "939", "tag": "study_15" }, { "uploader": "939", "tag": "study_20" }, { "uploader": "1", "tag": "study_41" }, { "uploader": "64", "tag": "study_52" } ], "features": [ { "name": "label", "index": "14", "type": "nominal", "distinct": "2", "missing": "0", "target": "1", "distr": [ [ "-1", "1" ], [ [ "3430", "0" ], [ "0", "1132" ] ] ] }, { "name": "age", "index": "0", "type": "numeric", "distinct": "70", "missing": "0", "min": "17", "max": "90", "mean": "39", "stdev": "13" }, { "name": "workclass", "index": "1", "type": "nominal", "distinct": "7", "missing": "0", "distr": [ [ "Federal-gov", "Without-pay", "Self-emp-inc", "State-gov", "Local-gov", "Self-emp-not-inc", "Private" ], [ [ "92", "62" ], [ "1", "0" ], [ "75", "88" ], [ "170", "53" ], [ "215", "99" ], [ "278", "116" ], [ "2599", "714" ] ] ] }, { "name": "fnlwgt", "index": "2", "type": "numeric", "distinct": "4222", "missing": "0", "min": "20308", "max": "1184622", "mean": "191077", "stdev": "108007" }, { "name": "education", "index": "3", "type": "nominal", "distinct": "16", "missing": "0", "distr": [ [ "11th", "Masters", "Some-college", "Assoc-voc", "5th-6th", "10th", "Preschool", "9th", "Assoc-acdm", "Doctorate", "Bachelors", "HS-grad", "12th", "Prof-school", "1st-4th", "7th-8th" ], [ [ "157", "10" ], [ "113", "144" ], [ "832", "213" ], [ "158", "53" ], [ "41", "1" ], [ "96", "9" ], [ "4", "0" ], [ "57", "3" ], [ "93", "39" ], [ "16", "47" ], [ "444", "319" ], [ "1248", "222" ], [ "54", "3" ], [ "14", "62" ], [ "25", "0" ], [ "78", "7" ] ] ] }, { "name": "educationNum", "index": "4", "type": "numeric", "distinct": "16", "missing": "0", "min": "1", "max": "16", "mean": "10", "stdev": "3" }, { "name": "maritalStatus", "index": "5", "type": "nominal", "distinct": "7", "missing": "0", "distr": [ [ "Married-civ-spouse", "Divorced", "Married-spouse-absent", "Separated", "Widowed", "Married-AF-spouse", "Never-married" ], [ [ "1241", "970" ], [ "531", "61" ], [ "38", "6" ], [ "134", "12" ], [ "124", "12" ], [ "1", "0" ], [ "1361", "71" ] ] ] }, { "name": "occupation", "index": "6", "type": "nominal", "distinct": "14", "missing": "0", "distr": [ [ "Sales", "Protective-serv", "Prof-specialty", "Adm-clerical", "Tech-support", "Priv-house-serv", "Other-service", "Handlers-cleaners", "Transport-moving", "Craft-repair", "Armed-Forces", "Machine-op-inspct", "Exec-managerial", "Farming-fishing" ], [ [ "391", "138" ], [ "67", "26" ], [ "336", "315" ], [ "451", "75" ], [ "81", "34" ], [ "32", "0" ], [ "482", "23" ], [ "199", "17" ], [ "206", "51" ], [ "461", "127" ], [ "1", "1" ], [ "255", "40" ], [ "333", "274" ], [ "135", "11" ] ] ] }, { "name": "relationship", "index": "7", "type": "nominal", "distinct": "6", "missing": "0", "distr": [ [ "Unmarried", "Not-in-family", "Other-relative", "Wife", "Husband", "Own-child" ], [ [ "458", "34" ], [ "980", "118" ], [ "144", "5" ], [ "107", "102" ], [ "1100", "863" ], [ "641", "10" ] ] ] }, { "name": "race", "index": "8", "type": "nominal", "distinct": "5", "missing": "0", "distr": [ [ "White", "Black", "Asian-Pac-Islander", "Other", "Amer-Indian-Eskimo" ], [ [ "2851", "1014" ], [ "403", "59" ], [ "109", "47" ], [ "32", "5" ], [ "35", "7" ] ] ] }, { "name": "sex", "index": "9", "type": "nominal", "distinct": "2", "missing": "0", "distr": [ [ "Male", "Female" ], [ [ "2187", "963" ], [ "1243", "169" ] ] ] }, { "name": "capitalGain", "index": "10", "type": "numeric", "distinct": "72", "missing": "0", "min": "0", "max": "99999", "mean": "1210", "stdev": "8429" }, { "name": "capitalLoss", "index": "11", "type": "numeric", "distinct": "57", "missing": "0", "min": "0", "max": "3683", "mean": "96", "stdev": "423" }, { "name": "hoursPerWeek", "index": "12", "type": "numeric", "distinct": "77", "missing": "0", "min": "2", "max": "99", "mean": "41", "stdev": "12" }, { "name": "nativeCountry", "index": "13", "type": "nominal", "distinct": "39", "missing": "88", "distr": [ [ "Portugal", "Cuba", "Philippines", "Iran", "Taiwan", "Greece", "Ecuador", "Yugoslavia", "Columbia", "United-States", "Ireland", "England", "Nicaragua", "South", "Italy", "India", "Vietnam", "France", "Haiti", "Honduras", "Peru", "China", "Trinadad&Tobago", "Puerto-Rico", "Hong", "Guatemala", "Outlying-US(Guam-USVI-etc)", "Jamaica", "Scotland", "Cambodia", "Hungary", "Mexico", "Laos", "El-Salvador", "Canada", "Poland", "Dominican-Republic", "Germany", "Japan" ], [ [ "6", "1" ], [ "5", "4" ], [ "21", "11" ], [ "2", "2" ], [ "4", "4" ], [ "4", "1" ], [ "2", "1" ], [ "2", "0" ], [ "10", "1" ], [ "3046", "1036" ], [ "2", "3" ], [ "5", "6" ], [ "4", "0" ], [ "12", "2" ], [ "8", "2" ], [ "8", "4" ], [ "6", "0" ], [ "1", "1" ], [ "2", "0" ], [ "1", "0" ], [ "4", "0" ], [ "8", "4" ], [ "5", "0" ], [ "15", "1" ], [ "2", "0" ], [ "6", "0" ], [ "2", "0" ], [ "6", "1" ], [ "5", "0" ], [ "0", "3" ], [ "2", "0" ], [ "92", "3" ], [ "0", "1" ], [ "20", "1" ], [ "4", "7" ], [ "9", "1" ], [ "7", "0" ], [ "20", "5" ], [ "7", "3" ] ] ] } ], "nr_of_issues": 0, "nr_of_downvotes": 0, "nr_of_likes": 0, "nr_of_downloads": 0, "total_downloads": 0, "reach": 0, "reuse": 0, "impact_of_reuse": 0, "reach_of_reuse": 0, "impact": 0 }