{ "data_id": "43822", "name": "Perth-House-Prices", "exact_name": "Perth-House-Prices", "version": 1, "version_label": "v1.0", "description": "Acknowledgements\nThis data was scraped from http:\/\/house.speakingsame.com\/ and includes data from 322 Perth suburbs, resulting in an average of about 100 rows per suburb.\nContent\nI believe the columns chosen to represent this dataset are the most crucial in predicting house prices. Some preliminary analysis I conducted showed a significant correlation between each of these columns and the response variable (i.e. price). \nData obtained from other than scrape source\nLongitude and Latitude data was obtained from data.gov.au.\nSchool ranking data was obtained from bettereducation.\nThe nearest schools to each address selected in this dataset are schools which are defined to be 'ATAR-applicable'. In the Australian secondary school education system, ATAR is a scoring system used to assess a student's culminative academic results and is used for entry into Australian universities. As such, schools which do not have an ATAR program such as primary schools, vocational schools, special needs schools etc. are not considered in determining the nearest school.\nDo also note that under the \"NEAREST_SCH_RANK\" column, there are some missing rows as some schools are unranked according to this criteria by bettereducation.", "format": "arff", "uploader": "Elif Ceren Gok", "uploader_id": 30125, "visibility": "public", "creator": null, "contributor": null, "date": "2022-03-24 15:30:43", "update_comment": null, "last_update": "2022-03-24 15:30:43", "licence": "CC BY-NC-SA 4.0", "status": "active", "error_message": null, "url": "https:\/\/www.openml.org\/data\/download\/22102647\/dataset", "default_target_attribute": null, "row_id_attribute": null, "ignore_attribute": "\"ADDRESS\"", "runs": 0, "suggest": { "input": [ "Perth-House-Prices", "Acknowledgements This data was scraped from http:\/\/house.speakingsame.com\/ and includes data from 322 Perth suburbs, resulting in an average of about 100 rows per suburb. Content I believe the columns chosen to represent this dataset are the most crucial in predicting house prices. Some preliminary analysis I conducted showed a significant correlation between each of these columns and the response variable (i.e. price). Data obtained from other than scrape source Longitude and Latitude data was " ], "weight": 5 }, "qualities": { "NumberOfInstances": 33656, "NumberOfFeatures": 18, "NumberOfClasses": null, "NumberOfMissingValues": 16585, "NumberOfInstancesWithMissingValues": 14448, "NumberOfNumericFeatures": 14, "NumberOfSymbolicFeatures": 0, "Dimensionality": 0.0005348229141906347, "PercentageOfNumericFeatures": 77.77777777777779, "MajorityClassPercentage": null, "PercentageOfSymbolicFeatures": 0, "MajorityClassSize": null, "MinorityClassPercentage": null, "MinorityClassSize": null, "NumberOfBinaryFeatures": 0, "PercentageOfBinaryFeatures": 0, "PercentageOfInstancesWithMissingValues": 42.92845257903494, "AutoCorrelation": null, "PercentageOfMissingValues": 2.73766605921348 }, "tags": [ { "uploader": "38960", "tag": "Agriculture" }, { "uploader": "38960", "tag": "Machine Learning" } ], "features": [ { "name": "ADDRESS", "index": "0", "type": "string", "distinct": "33566", "missing": "0", "ignore": "1" }, { "name": "SUBURB", "index": "1", "type": "string", "distinct": "321", "missing": "0" }, { "name": "PRICE", "index": "2", "type": "numeric", "distinct": "2297", "missing": "0", "min": "51000", "max": "2440000", "mean": "637072", "stdev": "355826" }, { "name": "BEDROOMS", "index": "3", "type": "numeric", "distinct": "10", "missing": "0", "min": "1", "max": "10", "mean": "4", "stdev": "1" }, { "name": "BATHROOMS", "index": "4", "type": "numeric", "distinct": "8", "missing": "0", "min": "1", "max": "16", "mean": "2", "stdev": "1" }, { "name": "GARAGE", "index": "5", "type": "numeric", "distinct": "25", "missing": "2478", "min": "1", "max": "99", "mean": "2", "stdev": "1" }, { "name": "LAND_AREA", "index": "6", "type": "numeric", "distinct": "4372", "missing": "0", "min": "61", "max": "999999", "mean": "2741", "stdev": "16694" }, { "name": "FLOOR_AREA", "index": "7", "type": "numeric", "distinct": "528", "missing": "0", "min": "1", "max": "870", "mean": "184", "stdev": "72" }, { "name": "BUILD_YEAR", "index": "8", "type": "numeric", "distinct": "124", "missing": "3155", "min": "1868", "max": "2017", "mean": "1990", "stdev": "21" }, { "name": "CBD_DIST", "index": "9", "type": "numeric", "distinct": "595", "missing": "0", "min": "681", "max": "59800", "mean": "19777", "stdev": "11364" }, { "name": "NEAREST_STN", "index": "10", "type": "string", "distinct": "68", "missing": "0" }, { "name": "NEAREST_STN_DIST", "index": "11", "type": "numeric", "distinct": "1189", "missing": "0", "min": "46", "max": "35500", "mean": "4523", "stdev": "4495" }, { "name": "DATE_SOLD", "index": "12", "type": "string", "distinct": "350", "missing": "0" }, { "name": "POSTCODE", "index": "13", "type": "numeric", "distinct": "114", "missing": "0", "min": "6003", "max": "6558", "mean": "6089", "stdev": "62" }, { "name": "LATITUDE", "index": "14", "type": "numeric", "distinct": "29707", "missing": "0", "min": "-32", "max": "0", "mean": "-32", "stdev": "0" }, { "name": "LONGITUDE", "index": "15", "type": "numeric", "distinct": "28557", "missing": "0", "min": "116", "max": "116", "mean": "116", "stdev": "0" }, { "name": "NEAREST_SCH", "index": "16", "type": "string", "distinct": "160", "missing": "0" }, { "name": "NEAREST_SCH_DIST", "index": "17", "type": "numeric", "distinct": "33318", "missing": "0", "min": "0", "max": "23", "mean": "2", "stdev": "2" }, { "name": "NEAREST_SCH_RANK", "index": "18", "type": "numeric", "distinct": "103", "missing": "10952", "min": "1", "max": "139", "mean": "73", "stdev": "41" } ], "nr_of_issues": 0, "nr_of_downvotes": 0, "nr_of_likes": 0, "nr_of_downloads": 0, "total_downloads": 0, "reach": 0, "reuse": 0, "impact_of_reuse": 0, "reach_of_reuse": 0, "impact": 0 }