{ "data_id": "42079", "name": "house_sales", "exact_name": "house_sales", "version": 1, "version_label": "0.1", "description": "**Author**: https:\/\/www.kaggle.com\/harlfoxem\/ \r\nhttps:\/\/www.kaggle.com\/harlfoxem\/ \r\n**Source**: [original](https:\/\/www.kaggle.com\/harlfoxem\/housesalesprediction) - 2016-08-25 \r\n**Please cite**: \r\n\r\nThis dataset contains house sale prices for King County, which includes Seattle. It includes homes sold between May 2014 and May 2015.\r\n\r\nIt contains 19 house features plus the price and the id columns, along with 21613 observations.\r\nIt's a great dataset for evaluating simple regression models.\r\n\r\n* Id: Unique ID for each home sold\r\n* Date: Date of the home sale\r\n* Price: Price of each home sold\r\n* Bedrooms: Number of bedrooms\r\n* Bathrooms: Number of bathrooms, where .5 accounts for a room with a toilet but no shower\r\n* Sqft_living: Square footage of the apartments interior living space\r\n* Sqft_lot: Square footage of the land space\r\n* Floors: Number of floors\r\n* Waterfront: A dummy variable for whether the apartment was overlooking the waterfront or not\r\n* View: An index from 0 to 4 of how good the view of the property was\r\n* Condition: An index from 1 to 5 on the condition of the apartment\r\n* Grade: An index from 1 to 13, where 1-3 falls short of the building construction and design, 7 has an average level of construction and design, and 11-13 have a high quality level of construction and design\r\n* Sqft_above: The square footage of the interior housing space that is above ground level.\r\n* Sqft_basement: The square footage of the interior housing space that is below ground level.\r\n* Yr_built: The year the house was initially built\r\n* Yr_renovated: The year of the house's last renovation\r\n* Zipcode: What zipcode area the house is in\r\n* Lat: Lattitude\r\n* Long: Longitude\r\n* Sqft_living15: The square footage of interior housing living space for the nearest 15 neighbors.\r\n* Sqft_lot15: The square footage of the land lots of the nearest 15 neighbors.", "format": "arff", "uploader": "Thomas Schmitt", "uploader_id": 3422, "visibility": "public", "creator": "\"https:\/\/www.kaggle.com\/harlfoxem\/\"", "contributor": "\"https:\/\/www.kaggle.com\/harlfoxem\/\"", "date": "2019-08-14 11:28:03", "update_comment": null, "last_update": "2019-08-14 11:28:03", "licence": "CC0 Public Domain", "status": "active", "error_message": null, "url": "https:\/\/www.openml.org\/data\/download\/21553045\/dataset", "default_target_attribute": null, "row_id_attribute": "id", "ignore_attribute": null, "runs": 0, "suggest": { "input": [ "house_sales", "https:\/\/www.kaggle.com\/harlfoxem\/ This dataset contains house sale prices for King County, which includes Seattle. It includes homes sold between May 2014 and May 2015. It contains 19 house features plus the price and the id columns, along with 21613 observations. It's a great dataset for evaluating simple regression models. * Id: Unique ID for each home sold * Date: Date of the home sale * Price: Price of each home sold * Bedrooms: Number of bedrooms * Bathrooms: Number of bathrooms, where .5 a " ], "weight": 5 }, "qualities": { "NumberOfInstances": 21613, "NumberOfFeatures": 20, "NumberOfClasses": null, "NumberOfMissingValues": 0, "NumberOfInstancesWithMissingValues": 0, "NumberOfNumericFeatures": 19, "NumberOfSymbolicFeatures": 0, "ClassEntropy": null, "MeanNoiseToSignalRatio": null, "Quartile2AttributeEntropy": null, "Dimensionality": 0.0009253689908851155, "MeanNominalAttDistinctValues": null, "Quartile2KurtosisOfNumericAtts": 2.7155742114145998, "EquivalentNumberOfAtts": null, "MeanSkewnessOfNumericAtts": 2.9877199194316457, "Quartile2MeansOfNumericAtts": 84.4022579003393, "MajorityClassPercentage": null, "MeanStdDevOfNumericAtts": 23116.639061599373, "Quartile2MutualInformation": null, "MajorityClassSize": null, "MinAttributeEntropy": null, "Quartile2SkewnessOfNumericAtts": 1.4466644733818423, "MaxAttributeEntropy": null, "MinKurtosisOfNumericAtts": -0.8534788731977669, "PercentageOfBinaryFeatures": 0, "Quartile2StdDevOfNumericAtts": 29.373410802390154, "MaxKurtosisOfNumericAtts": 285.07781969737005, "MinMeansOfNumericAtts": -122.21389640494147, "PercentageOfInstancesWithMissingValues": 0, "Quartile3AttributeEntropy": null, "MaxMeansOfNumericAtts": 540088.1417665279, "MinMutualInformation": null, "PercentageOfMissingValues": 0, "Quartile3KurtosisOfNumericAtts": 34.58554043194096, "MaxMutualInformation": null, "MinNominalAttDistinctValues": null, "PercentageOfNumericFeatures": 95, "Quartile3MeansOfNumericAtts": 2079.8997362698333, "MaxNominalAttDistinctValues": null, "MinSkewnessOfNumericAtts": -0.48527047653794136, "PercentageOfSymbolicFeatures": 0, "Quartile3MutualInformation": null, "MaxSkewnessOfNumericAtts": 13.060018959032032, "MinStdDevOfNumericAtts": 0.0865171977279032, "Quartile1AttributeEntropy": null, "Quartile3SkewnessOfNumericAtts": 4.024069144684663, "MaxStdDevOfNumericAtts": 367127.19648269983, "MinorityClassPercentage": null, "Quartile1KurtosisOfNumericAtts": 0.5257635652838322, "Quartile3StdDevOfNumericAtts": 828.0909776519135, "MeanAttributeEntropy": null, "MinorityClassSize": null, "Quartile1MeansOfNumericAtts": 2.1147573219821583, "StdvNominalAttDistinctValues": null, "MeanKurtosisOfNumericAtts": 36.371001790301726, "NumberOfBinaryFeatures": 0, "Quartile1MutualInformation": null, "MeanMeansOfNumericAtts": 35483.52096623635, "Quartile1SkewnessOfNumericAtts": 0.6161767211575476, "AutoCorrelation": null, "MeanMutualInformation": null, "Quartile1StdDevOfNumericAtts": 0.6507430463662671 }, "tags": [ { "uploader": "38960", "tag": "Agriculture" }, { "uploader": "38960", "tag": "Economics" } ], "features": [ { "name": "id", "index": "0", "type": "numeric", "distinct": "21436", "missing": "0", "identifier": "1", "min": "1000102", "max": "2147483647", "mean": "2147483647", "stdev": "2147483647" }, { "name": "date", "index": "1", "type": "string", "distinct": "372", "missing": "0" }, { "name": "price", "index": "2", "type": "numeric", "distinct": "4028", "missing": "0", "min": "75000", "max": "7700000", "mean": "540088", "stdev": "367127" }, { "name": "bedrooms", "index": "3", "type": "numeric", "distinct": "13", "missing": "0", "min": "0", "max": "33", "mean": "3", "stdev": "1" }, { "name": "bathrooms", "index": "4", "type": "numeric", "distinct": "30", "missing": "0", "min": "0", "max": "8", "mean": "2", "stdev": "1" }, { "name": "sqft_living", "index": "5", "type": "numeric", "distinct": "1038", "missing": "0", "min": "290", "max": "13540", "mean": "2080", "stdev": "918" }, { "name": "sqft_lot", "index": "6", "type": "numeric", "distinct": "9782", "missing": "0", "min": "520", "max": "1651359", "mean": "15107", "stdev": "41421" }, { "name": "floors", "index": "7", "type": "numeric", "distinct": "6", "missing": "0", "min": "1", "max": "4", "mean": "1", "stdev": "1" }, { "name": "waterfront", "index": "8", "type": "numeric", "distinct": "2", "missing": "0", "min": "0", "max": "1", "mean": "0", "stdev": "0" }, { "name": "view", "index": "9", "type": "numeric", "distinct": "5", "missing": "0", "min": "0", "max": "4", "mean": "0", "stdev": "1" }, { "name": "condition", "index": "10", "type": "numeric", "distinct": "5", "missing": "0", "min": "1", "max": "5", "mean": "3", "stdev": "1" }, { "name": "grade", "index": "11", "type": "numeric", "distinct": "12", "missing": "0", "min": "1", "max": "13", "mean": "8", "stdev": "1" }, { "name": "sqft_above", "index": "12", "type": "numeric", "distinct": "946", "missing": "0", "min": "290", "max": "9410", "mean": "1788", "stdev": "828" }, { "name": "sqft_basement", "index": "13", "type": "numeric", "distinct": "306", "missing": "0", "min": "0", "max": "4820", "mean": "292", "stdev": "443" }, { "name": "yr_built", "index": "14", "type": "numeric", "distinct": "116", "missing": "0", "min": "1900", "max": "2015", "mean": "1971", "stdev": "29" }, { "name": "yr_renovated", "index": "15", "type": "numeric", "distinct": "70", "missing": "0", "min": "0", "max": "2015", "mean": "84", "stdev": "402" }, { "name": "zipcode", "index": "16", "type": "numeric", "distinct": "70", "missing": "0", "min": "98001", "max": "98199", "mean": "98078", "stdev": "54" }, { "name": "lat", "index": "17", "type": "numeric", "distinct": "5034", "missing": "0", "min": "47", "max": "48", "mean": "48", "stdev": "0" }, { "name": "long", "index": "18", "type": "numeric", "distinct": "752", "missing": "0", "min": "-123", "max": "0", "mean": "-122", "stdev": "0" }, { "name": "sqft_living15", "index": "19", "type": "numeric", "distinct": "777", "missing": "0", "min": "399", "max": "6210", "mean": "1987", "stdev": "685" }, { "name": "sqft_lot15", "index": "20", "type": "numeric", "distinct": "8689", "missing": "0", "min": "651", "max": "871200", "mean": "12768", "stdev": "27304" } ], "nr_of_issues": 0, "nr_of_downvotes": 0, "nr_of_likes": 0, "nr_of_downloads": 0, "total_downloads": 0, "reach": 0, "reuse": 0, "impact_of_reuse": 0, "reach_of_reuse": 0, "impact": 0 }