{ "data_id": "44984", "name": "cps88wages", "exact_name": "cps88wages", "version": 2, "version_label": null, "description": "**Data Description**\n\nThis study uses data on males from the 1988 March CPS to sample the data. The March CPS contains information on previous year's wages, schooling, industry, and occupation. We select a sample of men ages 18 to 70 with positive annual income greater than 50 Dollars in 1992, who are not self-employed nor working without pay. The wage data is deflated by the deflator of Personal Consumption Expenditure for 1992. The data contains 28,155 observations and has variables characterizing the individuals.\n\nThe goal is to estimate the wage using information about working individuals.\n\n**Attribute Description**\n\n1. *wage* - target feature\n2. *education* - years of schooling\n3. *experience* - years of potential work experience\n4. *ethnicity* - race (\"cauc\", \"afam\")\n5. *smsa* - whether living in SMSA (\"no\", \"yes\")\n6. *region* - living region (\"northeast\", \"midwest\", \"south\", \"west\")\n7. *parttime* - whether working parttime (\"no\", \"yes\")", "format": "arff", "uploader": "Sebastian Fischer", "uploader_id": 30127, "visibility": "public", "creator": null, "contributor": null, "date": "2022-12-22 16:16:06", "update_comment": null, "last_update": "2022-12-22 16:16:06", "licence": "Public", "status": "active", "error_message": null, "url": "https:\/\/api.openml.org\/data\/download\/22111848\/file22f161d4b5556.arff", "default_target_attribute": "wage", "row_id_attribute": null, "ignore_attribute": null, "runs": 0, "suggest": { "input": [ "cps88wages", "This study uses data on males from the 1988 March CPS to sample the data. The March CPS contains information on previous year's wages, schooling, industry, and occupation. We select a sample of men ages 18 to 70 with positive annual income greater than 50 Dollars in 1992, who are not self-employed nor working without pay. The wage data is deflated by the deflator of Personal Consumption Expenditure for 1992. The data contains 28,155 observations and has variables characterizing the individuals. " ], "weight": 5 }, "qualities": { "NumberOfInstances": 28155, "NumberOfFeatures": 7, "NumberOfClasses": 0, "NumberOfMissingValues": 0, "NumberOfInstancesWithMissingValues": 0, "NumberOfNumericFeatures": 3, "NumberOfSymbolicFeatures": 4, "MajorityClassSize": null, "MinorityClassPercentage": null, "MinorityClassSize": null, "NumberOfBinaryFeatures": 3, "PercentageOfBinaryFeatures": 42.857142857142854, "PercentageOfInstancesWithMissingValues": 0, "AutoCorrelation": -389.4422746323893, "PercentageOfMissingValues": 0, "Dimensionality": 0.00024862369028591725, "PercentageOfNumericFeatures": 42.857142857142854, "MajorityClassPercentage": null, "PercentageOfSymbolicFeatures": 57.14285714285714 }, "tags": [ { "tag": "Life Science", "uploader": "38960" }, { "tag": "Machine Learning", "uploader": "38960" }, { "tag": "study_353", "uploader": "0" } ], "features": [ { "name": "wage", "index": "0", "type": "numeric", "distinct": "5970", "missing": "0", "target": "1", "min": "50", "max": "18777", "mean": "604", "stdev": "454" }, { "name": "education", "index": "1", "type": "numeric", "distinct": "19", "missing": "0", "min": "0", "max": "18", "mean": "13", "stdev": "3" }, { "name": "experience", "index": "2", "type": "numeric", "distinct": "67", "missing": "0", "min": "-4", "max": "63", "mean": "18", "stdev": "13" }, { "name": "ethnicity", "index": "3", "type": "nominal", "distinct": "2", "missing": "0", "distr": [] }, { "name": "smsa", "index": "4", "type": "nominal", "distinct": "2", "missing": "0", "distr": [] }, { "name": "region", "index": "5", "type": "nominal", "distinct": "4", "missing": "0", "distr": [] }, { "name": "parttime", "index": "6", "type": "nominal", "distinct": "2", "missing": "0", "distr": [] } ], "nr_of_issues": 0, "nr_of_downvotes": 0, "nr_of_likes": 0, "nr_of_downloads": 0, "total_downloads": 0, "reach": 0, "reuse": 0, "impact_of_reuse": 0, "reach_of_reuse": 0, "impact": 0 }