{ "data_id": "42750", "name": "Census-Income-KDD", "exact_name": "Census-Income-KDD", "version": 3, "version_label": null, "description": "**Author**: Terran Lane and Ronny Kohavi. Data Mining and Visualization. Silicon Graphics \n**Source**: [original](https:\/\/archive.ics.uci.edu\/ml\/datasets\/Census-Income+(KDD)) - 2000 \n**Please cite**: Dua, D. and Graff, C. (2019). UCI Machine Learning Repository. Irvine, CA: University of California, School of Information and Computer Science. \n\nThis version has feature names based on https:\/\/www2.1010data.com\/documentationcenter\/beta\/Tutorials\/MachineLearningExamples\/CensusIncomeDataSet.html\n\nMissing data is also properly encoded in this version.\n\nThe feature 'unknown' in the dataset does not appear in the list above. This possibly refers to the feature 'instance weight' in the original UCI description.\n\nFeature Name\n\n* Age of the worker age\n\n* Class of worker class_worker\n\n* Industry code det_ind_code\n\n* Occupation code det_occ_code\n\n* Level of education education\n\n* Wage per hour wage_per_hour\n\n* Enrolled in educational institution last week hs_college\n\n* Marital status marital_stat\n\n* Major industry code major_ind_code\n\n* Major occupation code major_occ_code\n\n* Race race\n\n* Hispanic origin hisp_origin\n\n* Sex sex\n\n* Member of a labor union union_member\n\n* Reason for unemployment unemp_reason\n\n* Full- or part-time employment status full_or_part_emp\n\n* Capital gains capital_gains\n\n* Capital losses capital_losses\n\n* Dividends from stocks stock_dividends\n\n* Tax filer status tax_filer_stat\n\n* Region of previous residence region_prev_res\n\n* State of previous residence state_prev_res\n\n* Detailed household and family status det_hh_fam_stat\n\n* Detailed household summary in household det_hh_summ\n\n* Unknown Unknown\n\n* Migration code - change in MSA mig_chg_msa\n\n* Migration code - change in region mig_chg_reg\n\n* Migration code - move within region mig_move_reg\n\n* Live in this house one year ago mig_same\n\n* Migration - previous residence in sunbelt mig_prev_sunbelt\n\n* Number of persons that worked for employer num_emp\n\n* Family members under 18 fam_under_18\n\n* Country of birth father country_father\n\n* Country of birth mother country_mother\n\n* Country of birth country_self\n\n* Citizenship citizenship\n\n* Own business or self-employed? own_or_self\n\n* Fill included questionnaire for Veterans Admin. vet_question\n\n* Veterans benefits vet_benefits\n\n* Weeks worked in the year weeks_worked\n\n* Year of survey year\n\n* Income less than or greater than 0,000 income_50k\n\n* Number of years of education edu_year", "format": "arff", "uploader": "Marcos de Paula Bueno", "uploader_id": 11601, "visibility": "public", "creator": "Terran Lane and Ronny Kohavi. Data Mining and Visualization. Silicon Graphics", "contributor": null, "date": "2020-12-07 20:29:02", "update_comment": null, "last_update": "2020-12-07 20:29:02", "licence": "Public", "status": "active", "error_message": null, "url": "https:\/\/www.openml.org\/data\/download\/22044835\/dataset", "default_target_attribute": "income_50k", "row_id_attribute": null, "ignore_attribute": null, "runs": 0, "suggest": { "input": [ "Census-Income-KDD", "This version has feature names based on https:\/\/www2.1010data.com\/documentationcenter\/beta\/Tutorials\/MachineLearningExamples\/CensusIncomeDataSet.html Missing data is also properly encoded in this version. The feature 'unknown' in the dataset does not appear in the list above. This possibly refers to the feature 'instance weight' in the original UCI description. Feature Name * Age of the worker age * Class of worker class_worker * Industry code det_ind_code * Occupation code det_occ_code * Level " ], "weight": 5 }, "qualities": { "NumberOfInstances": 199523, "NumberOfFeatures": 42, "NumberOfClasses": 2, "NumberOfMissingValues": 415717, "NumberOfInstancesWithMissingValues": 104393, "NumberOfNumericFeatures": 13, "NumberOfSymbolicFeatures": 0, "PercentageOfBinaryFeatures": 0, "PercentageOfInstancesWithMissingValues": 52.321286267748576, "PercentageOfMissingValues": 4.960843516548874, "AutoCorrelation": 1, "PercentageOfNumericFeatures": 30.952380952380953, "Dimensionality": 0.00021050204738300849, "PercentageOfSymbolicFeatures": 0, "MajorityClassPercentage": 93.79419916500854, "MajorityClassSize": 187141, "MinorityClassPercentage": 6.205800834991455, "MinorityClassSize": 12382, "NumberOfBinaryFeatures": 0 }, "tags": [ { "uploader": "38960", "tag": "Chemistry" }, { "uploader": "38960", "tag": "Machine Learning" } ], "features": [ { "name": "income_50k", "index": "41", "type": "string", "distinct": "2", "missing": "0", "target": "1" }, { "name": "age", "index": "0", "type": "numeric", "distinct": "91", "missing": "0", "min": "0", "max": "90", "mean": "34", "stdev": "22" }, { "name": "class_worker", "index": "1", "type": "string", "distinct": "9", "missing": "0" }, { "name": "det_ind_code", "index": "2", "type": "numeric", "distinct": "52", "missing": "0", "min": "0", "max": "51", "mean": "15", "stdev": "18" }, { "name": "det_occ_code", "index": "3", "type": "numeric", "distinct": "47", "missing": "0", "min": "0", "max": "46", "mean": "11", "stdev": "14" }, { "name": "education", "index": "4", "type": "string", "distinct": "17", "missing": "0" }, { "name": "wage_per_hour", "index": "5", "type": "numeric", "distinct": "1240", "missing": "0", "min": "0", "max": "9999", "mean": "55", "stdev": "275" }, { "name": "hs_college", "index": "6", "type": "string", "distinct": "3", "missing": "0" }, { "name": "marital_stat", "index": "7", "type": "string", "distinct": "7", "missing": "0" }, { "name": "major_ind_code", "index": "8", "type": "string", "distinct": "24", "missing": "0" }, { "name": "major_occ_code", "index": "9", "type": "string", "distinct": "15", "missing": "0" }, { "name": "race", "index": "10", "type": "string", "distinct": "5", "missing": "0" }, { "name": "hisp_origin", "index": "11", "type": "string", "distinct": "10", "missing": "0" }, { "name": "sex", "index": "12", "type": "string", "distinct": "2", "missing": "0" }, { "name": "union_member", "index": "13", "type": "string", "distinct": "3", "missing": "0" }, { "name": "unemp_reason", "index": "14", "type": "string", "distinct": "6", "missing": "0" }, { "name": "full_or_part_emp", "index": "15", "type": "string", "distinct": "8", "missing": "0" }, { "name": "capital_gains", "index": "16", "type": "numeric", "distinct": "132", "missing": "0", "min": "0", "max": "99999", "mean": "435", "stdev": "4698" }, { "name": "capital_losses", "index": "17", "type": "numeric", "distinct": "113", "missing": "0", "min": "0", "max": "4608", "mean": "37", "stdev": "272" }, { "name": "stock_dividends", "index": "18", "type": "numeric", "distinct": "1478", "missing": "0", "min": "0", "max": "99999", "mean": "198", "stdev": "1984" }, { "name": "tax_filer_stat", "index": "19", "type": "string", "distinct": "6", "missing": "0" }, { "name": "region_prev_res", "index": "20", "type": "string", "distinct": "6", "missing": "0" }, { "name": "state_prev_res", "index": "21", "type": "string", "distinct": "50", "missing": "708" }, { "name": "det_hh_fam_stat", "index": "22", "type": "string", "distinct": "38", "missing": "0" }, { "name": "det_hh_summ", "index": "23", "type": "string", "distinct": "8", "missing": "0" }, { "name": "unknown", "index": "24", "type": "numeric", "distinct": "99800", "missing": "0", "min": "38", "max": "18656", "mean": "1740", "stdev": "994" }, { "name": "mig_chg_msa", "index": "25", "type": "string", "distinct": "9", "missing": "99696" }, { "name": "mig_chg_reg", "index": "26", "type": "string", "distinct": "8", "missing": "99696" }, { "name": "mig_move_reg", "index": "27", "type": "string", "distinct": "9", "missing": "99696" }, { "name": "mig_same", "index": "28", "type": "string", "distinct": "3", "missing": "0" }, { "name": "mig_prev_sunbelt", "index": "29", "type": "string", "distinct": "3", "missing": "99696" }, { "name": "num_emp", "index": "30", "type": "numeric", "distinct": "7", "missing": "0", "min": "0", "max": "6", "mean": "2", "stdev": "2" }, { "name": "fam_under_18", "index": "31", "type": "string", "distinct": "5", "missing": "0" }, { "name": "country_father", "index": "32", "type": "string", "distinct": "42", "missing": "6713" }, { "name": "country_mother", "index": "33", "type": "string", "distinct": "42", "missing": "6119" }, { "name": "country_self", "index": "34", "type": "string", "distinct": "42", "missing": "3393" }, { "name": "citizenship", "index": "35", "type": "string", "distinct": "5", "missing": "0" }, { "name": "own_or_self", "index": "36", "type": "numeric", "distinct": "3", "missing": "0", "min": "0", "max": "2", "mean": "0", "stdev": "1" }, { "name": "vet_question", "index": "37", "type": "string", "distinct": "3", "missing": "0" }, { "name": "vet_benefits", "index": "38", "type": "numeric", "distinct": "3", "missing": "0", "min": "0", "max": "2", "mean": "2", "stdev": "1" }, { "name": "weeks_worked", "index": "39", "type": "numeric", "distinct": "53", "missing": "0", "min": "0", "max": "52", "mean": "23", "stdev": "24" }, { "name": "year", "index": "40", "type": "numeric", "distinct": "2", "missing": "0", "min": "94", "max": "95", "mean": "94", "stdev": "1" } ], "nr_of_issues": 0, "nr_of_downvotes": 0, "nr_of_likes": 0, "nr_of_downloads": 1, "total_downloads": 3, "reach": 1, "reuse": 0, "impact_of_reuse": 0, "reach_of_reuse": 0, "impact": 0 }