{ "data_id": "44014", "name": "house_16H", "exact_name": "house_16H", "version": 7, "version_label": null, "description": "Dataset used in the tabular data benchmark https:\/\/github.com\/LeoGrin\/tabular-benchmark, transformed in the same way. This dataset belongs to the \"regression on numerical features\" benchmark. Original description: \n \n**Author**: \n**Source**: Unknown - Date unknown \n**Please cite**: \n\nThis database was designed on the basis of data provided by US Census\nBureau [http:\/\/www.census.gov] (under Lookup Access\n[http:\/\/www.census.gov\/cdrom\/lookup]: Summary Tape File 1). The data\nwere collected as part of the 1990 US census. These are mostly counts\ncumulated at different survey levels. For the purpose of this data set\na level State-Place was used. Data from all states was obtained. Most\nof the counts were changed into appropriate proportions. There are 4\ndifferent data sets obtained from this database: House(8H) House(8L)\nHouse(16H) House(16L) These are all concerned with predicting the\nmedian price of the house in the region based on demographic\ncomposition and a state of housing market in the region. A number in\nthe name signifies the number of attributes of the data set. A\nfollowing letter denotes a very rough approximation to the difficulty\nof the task. For Low task difficulty, more correlated attributes were\nchosen as signified by univariate smooth fit of that input on the\ntarget. Tasks with High difficulty have had their attributes chosen to\nmake the modelling more difficult due to higher variance or lower\ncorrelation of the inputs to the target.\n\nOriginal source: DELVE repository of data.\nSource: collection of regression datasets by Luis Torgo (ltorgo@ncc.up.pt) at\nhttp:\/\/www.ncc.up.pt\/~ltorgo\/Regression\/DataSets.html\nCharacteristics: 22784 cases, 17 continuous attributes.", "format": "arff", "uploader": "Leo Grin", "uploader_id": 26324, "visibility": "public", "creator": "\"Luis Torgo\"", "contributor": "\"Leo Grin\"", "date": "2022-06-16 21:13:04", "update_comment": null, "last_update": "2022-06-16 21:13:04", "licence": "Public", "status": "active", "error_message": null, "url": "https:\/\/old.openml.org\/data\/download\/22103102\/dataset", "default_target_attribute": "price", "row_id_attribute": null, "ignore_attribute": null, "runs": 0, "suggest": { "input": [ "house_16H", "Dataset used in the tabular data benchmark https:\/\/github.com\/LeoGrin\/tabular-benchmark, transformed in the same way. This dataset belongs to the \"regression on numerical features\" benchmark. Original description: This database was designed on the basis of data provided by US Census Bureau [http:\/\/www.census.gov] (under Lookup Access [http:\/\/www.census.gov\/cdrom\/lookup]: Summary Tape File 1). The data were collected as part of the 1990 US census. These are mostly counts cumulated at different su " ], "weight": 5 }, "qualities": { "NumberOfInstances": 22784, "NumberOfFeatures": 17, "NumberOfClasses": 0, "NumberOfMissingValues": 0, "NumberOfInstancesWithMissingValues": 0, "NumberOfNumericFeatures": 17, "NumberOfSymbolicFeatures": 0, "PercentageOfMissingValues": 0, "AutoCorrelation": 0.1545322626299256, "PercentageOfNumericFeatures": 100, "Dimensionality": 0.0007461376404494382, "PercentageOfSymbolicFeatures": 0, "MajorityClassPercentage": null, "MajorityClassSize": null, "MinorityClassPercentage": null, "MinorityClassSize": null, "NumberOfBinaryFeatures": 0, "PercentageOfBinaryFeatures": 0, "PercentageOfInstancesWithMissingValues": 0 }, "tags": [ { "uploader": "38960", "tag": "Machine Learning" } ], "features": [ { "name": "price", "index": "16", "type": "numeric", "distinct": "2045", "missing": "0", "target": "1", "min": "0", "max": "13", "mean": "10", "stdev": "1" }, { "name": "P1", "index": "0", "type": "numeric", "distinct": "8832", "missing": "0", "min": "2", "max": "7322564", "mean": "7809", "stdev": "65872" }, { "name": "P5p1", "index": "1", "type": "numeric", "distinct": "17504", "missing": "0", "min": "0", "max": "1", "mean": "0", "stdev": "0" }, { "name": "P6p2", "index": "2", "type": "numeric", "distinct": "13683", "missing": "0", "min": "0", "max": "1", "mean": "0", "stdev": "0" }, { "name": "P11p4", "index": "3", "type": "numeric", "distinct": "19220", "missing": "0", "min": "0", "max": "1", "mean": "0", "stdev": "0" }, { "name": "P14p9", "index": "4", "type": "numeric", "distinct": "16168", "missing": "0", "min": "0", "max": "1", "mean": "0", "stdev": "0" }, { "name": "P15p1", "index": "5", "type": "numeric", "distinct": "18753", "missing": "0", "min": "0", "max": "1", "mean": "1", "stdev": "0" }, { "name": "P15p3", "index": "6", "type": "numeric", "distinct": "9655", "missing": "0", "min": "0", "max": "1", "mean": "0", "stdev": "0" }, { "name": "P16p2", "index": "7", "type": "numeric", "distinct": "15570", "missing": "0", "min": "0", "max": "1", "mean": "1", "stdev": "0" }, { "name": "P18p2", "index": "8", "type": "numeric", "distinct": "8070", "missing": "0", "min": "0", "max": "0", "mean": "0", "stdev": "0" }, { "name": "P27p4", "index": "9", "type": "numeric", "distinct": "12052", "missing": "0", "min": "0", "max": "1", "mean": "0", "stdev": "0" }, { "name": "H2p2", "index": "10", "type": "numeric", "distinct": "15662", "missing": "0", "min": "0", "max": "1", "mean": "0", "stdev": "0" }, { "name": "H8p2", "index": "11", "type": "numeric", "distinct": "10941", "missing": "0", "min": "0", "max": "1", "mean": "0", "stdev": "0" }, { "name": "H10p1", "index": "12", "type": "numeric", "distinct": "10855", "missing": "0", "min": "0", "max": "1", "mean": "1", "stdev": "0" }, { "name": "H13p1", "index": "13", "type": "numeric", "distinct": "17097", "missing": "0", "min": "0", "max": "1", "mean": "0", "stdev": "0" }, { "name": "H18pA", "index": "14", "type": "numeric", "distinct": "9063", "missing": "0", "min": "0", "max": "1", "mean": "0", "stdev": "0" }, { "name": "H40p4", "index": "15", "type": "numeric", "distinct": "2421", "missing": "0", "min": "0", "max": "1", "mean": "0", "stdev": "0" } ], "nr_of_issues": 0, "nr_of_downvotes": 0, "nr_of_likes": 0, "nr_of_downloads": 0, "total_downloads": 0, "reach": 0, "reuse": 0, "impact_of_reuse": 0, "reach_of_reuse": 0, "impact": 0 }