{ "data_id": "218", "name": "house_8L", "exact_name": "house_8L", "version": 1, "version_label": "1", "description": "**Author**: \n**Source**: Unknown - \n**Please cite**: \n\nThis database was designed on the basis of data provided by US Census\n Bureau [http:\/\/www.census.gov] (under Lookup Access\n [http:\/\/www.census.gov\/cdrom\/lookup]: Summary Tape File 1). The data\n were collected as part of the 1990 US census. These are mostly counts\n cumulated at different survey levels. For the purpose of this data set\n a level State-Place was used. Data from all states was obtained. Most\n of the counts were changed into appropriate proportions. There are 4\n different data sets obtained from this database: House(8H) House(8L)\n House(16H) House(16L) These are all concerned with predicting the\n median price of the house in the region based on demographic\n composition and a state of housing market in the region. A number in\n the name signifies the number of attributes of the data set. A\n following letter denotes a very rough approximation to the difficulty\n of the task. For Low task difficulty, more correlated attributes were\n chosen as signified by univariate smooth fit of that input on the\n target. Tasks with High difficulty have had their attributes chosen to\n make the modelling more difficult due to higher variance or lower\n correlation of the inputs to the target.\n \n Original source: DELVE repository of data. \n Source: collection of regression datasets by Luis Torgo (ltorgo@ncc.up.pt) at\n http:\/\/www.ncc.up.pt\/~ltorgo\/Regression\/DataSets.html\n Characteristics: 22784 cases, 9 continuous attributes.", "format": "ARFF", "uploader": "Jan van Rijn", "uploader_id": 1, "visibility": "public", "creator": null, "contributor": null, "date": "2014-04-23 13:18:38", "update_comment": null, "last_update": "2014-04-23 13:18:38", "licence": "Public", "status": "active", "error_message": null, "url": "https:\/\/www.openml.org\/data\/download\/3655\/dataset_2204_house_8L.arff", "default_target_attribute": "price", "row_id_attribute": null, "ignore_attribute": null, "runs": 2, "suggest": { "input": [ "house_8L", "This database was designed on the basis of data provided by US Census Bureau [http:\/\/www.census.gov] (under Lookup Access [http:\/\/www.census.gov\/cdrom\/lookup]: Summary Tape File 1). The data were collected as part of the 1990 US census. These are mostly counts cumulated at different survey levels. For the purpose of this data set a level State-Place was used. Data from all states was obtained. Most of the counts were changed into appropriate proportions. There are 4 different data sets obtained " ], "weight": 5 }, "qualities": { "NumberOfInstances": 22784, "NumberOfFeatures": 9, "NumberOfClasses": 0, "NumberOfMissingValues": 0, "NumberOfInstancesWithMissingValues": 0, "NumberOfNumericFeatures": 9, "NumberOfSymbolicFeatures": 0, "Quartile2SkewnessOfNumericAtts": 1.550698076382339, "REPTreeDepth3Kappa": null, "DecisionStumpKappa": null, "MaxMeansOfNumericAtts": 50074.439782303394, "MinMutualInformation": null, "PercentageOfBinaryFeatures": 0, "Quartile2StdDevOfNumericAtts": 0.13981136678950676, "RandomTreeDepth1AUC": null, "Dimensionality": 0.0003950140449438202, "MaxMutualInformation": null, "MinNominalAttDistinctValues": null, "PercentageOfInstancesWithMissingValues": 0, "Quartile3AttributeEntropy": null, "RandomTreeDepth1ErrRate": null, "EquivalentNumberOfAtts": null, "MaxNominalAttDistinctValues": null, "MinSkewnessOfNumericAtts": -0.2709088573209376, "PercentageOfMissingValues": 0, "Quartile3KurtosisOfNumericAtts": 117.70346368881404, "AutoCorrelation": -43992.475573892814, "RandomTreeDepth1Kappa": null, "J48.00001.AUC": null, "MaxSkewnessOfNumericAtts": 74.16685526289018, "MinStdDevOfNumericAtts": 0.04210519473919126, "PercentageOfNumericFeatures": 100, "Quartile3MeansOfNumericAtts": 1470.9218697503552, "CfsSubsetEval_DecisionStumpAUC": null, "RandomTreeDepth2AUC": null, "J48.00001.ErrRate": null, "MaxStdDevOfNumericAtts": 52843.47555124904, "MinorityClassPercentage": null, "PercentageOfSymbolicFeatures": 0, "Quartile3MutualInformation": null, "CfsSubsetEval_DecisionStumpErrRate": null, "RandomTreeDepth2ErrRate": null, "J48.00001.Kappa": null, "MeanAttributeEntropy": null, "MinorityClassSize": null, "Quartile1AttributeEntropy": null, "Quartile3SkewnessOfNumericAtts": 8.520328299860996, "CfsSubsetEval_DecisionStumpKappa": null, "RandomTreeDepth2Kappa": null, "J48.0001.AUC": null, "MeanKurtosisOfNumericAtts": 867.8055603214503, "NaiveBayesAUC": null, "Quartile1KurtosisOfNumericAtts": 2.3816616905470203, "Quartile3StdDevOfNumericAtts": 12475.319408671363, "CfsSubsetEval_NaiveBayesAUC": null, "RandomTreeDepth3AUC": null, "J48.0001.ErrRate": null, "MeanMeansOfNumericAtts": 5890.912731552114, "NaiveBayesErrRate": null, "Quartile1MeansOfNumericAtts": 0.11447744480117628, "REPTreeDepth1AUC": null, "CfsSubsetEval_NaiveBayesErrRate": null, "RandomTreeDepth3ErrRate": null, "J48.0001.Kappa": null, "MeanMutualInformation": null, "NaiveBayesKappa": null, "Quartile1MutualInformation": null, "REPTreeDepth1ErrRate": null, "CfsSubsetEval_NaiveBayesKappa": null, "RandomTreeDepth3Kappa": null, "J48.001.AUC": null, "MeanNoiseToSignalRatio": null, "NumberOfBinaryFeatures": 0, "Quartile1SkewnessOfNumericAtts": -0.20161965359070916, "REPTreeDepth1Kappa": null, "CfsSubsetEval_kNN1NAUC": null, "StdvNominalAttDistinctValues": null, "J48.001.ErrRate": null, "MeanNominalAttDistinctValues": null, "Quartile1StdDevOfNumericAtts": 0.0737993531073722, "REPTreeDepth2AUC": null, "CfsSubsetEval_kNN1NErrRate": null, "kNN1NAUC": null, "J48.001.Kappa": null, "MeanSkewnessOfNumericAtts": 10.62923331438247, "Quartile2AttributeEntropy": null, "REPTreeDepth2ErrRate": null, "CfsSubsetEval_kNN1NKappa": null, "kNN1NErrRate": null, "MajorityClassPercentage": null, "MeanStdDevOfNumericAtts": 8643.879391781787, "Quartile2KurtosisOfNumericAtts": 5.913719502062106, "REPTreeDepth2Kappa": null, "ClassEntropy": null, "kNN1NKappa": null, "MajorityClassSize": null, "MinAttributeEntropy": null, "Quartile2MeansOfNumericAtts": 0.49162564662482483, "REPTreeDepth3AUC": null, "DecisionStumpAUC": null, "MaxAttributeEntropy": null, "MinKurtosisOfNumericAtts": -1.0959003161475254, "Quartile2MutualInformation": null, "REPTreeDepth3ErrRate": null, "DecisionStumpErrRate": null, "MaxKurtosisOfNumericAtts": 7546.166943835947, "MinMeansOfNumericAtts": 0.010329851518609533 }, "tags": [ { "uploader": "38960", "tag": "Demographics" }, { "uploader": "38960", "tag": "Economics" } ], "features": [ { "name": "price", "index": "8", "type": "numeric", "distinct": "2045", "missing": "0", "target": "1", "min": "0", "max": "500001", "mean": "50074", "stdev": "52843" }, { "name": "P3", "index": "0", "type": "numeric", "distinct": "5818", "missing": "0", "min": "1", "max": "2819401", "mean": "2936", "stdev": "24950" }, { "name": "P6p4", "index": "1", "type": "numeric", "distinct": "12051", "missing": "0", "min": "0", "max": "1", "mean": "0", "stdev": "0" }, { "name": "P11p3", "index": "2", "type": "numeric", "distinct": "18765", "missing": "0", "min": "0", "max": "1", "mean": "0", "stdev": "0" }, { "name": "P16p2", "index": "3", "type": "numeric", "distinct": "15570", "missing": "0", "min": "0", "max": "1", "mean": "1", "stdev": "0" }, { "name": "P19p2", "index": "4", "type": "numeric", "distinct": "10941", "missing": "0", "min": "0", "max": "1", "mean": "0", "stdev": "0" }, { "name": "H5p2", "index": "5", "type": "numeric", "distinct": "6002", "missing": "0", "min": "0", "max": "1", "mean": "0", "stdev": "0" }, { "name": "H15p1", "index": "6", "type": "numeric", "distinct": "18585", "missing": "0", "min": "0", "max": "10", "mean": "6", "stdev": "1" }, { "name": "H40p4", "index": "7", "type": "numeric", "distinct": "2421", "missing": "0", "min": "0", "max": "1", "mean": "0", "stdev": "0" } ], "nr_of_issues": 0, "nr_of_downvotes": 0, "nr_of_likes": 0, "nr_of_downloads": 0, "total_downloads": 0, "reach": 0, "reuse": 0, "impact_of_reuse": 0, "reach_of_reuse": 0, "impact": 0 }