{ "data_id": "43475", "name": "Trulia-Property-Listing-Dataset-2020", "exact_name": "Trulia-Property-Listing-Dataset-2020", "version": 1, "version_label": "v1.0", "description": "Context\nThis dataset was created by our in house Web Scraping and Data Mining teams at PromptCloud and DataStock. You can download the full dataset here. This sample contains 30K records.\nContent\nThis dataset contains the following: \nTotal Records Count: 78519 Domain Name: trulia.com Date Range :: 01st Jan 2020 - 31st Jan 2020 File Extension : csv\nAvailable Fields:: Uniq Id, Crawl Timestamp, Url, Title, Description, Price, Image Url, Style, Sqr Ft, Longitude, Latitude, Home Id, Lot Size, Beds, Bath, Year Built, Price Sqr Ft, Features, Last Sold Year, Last Sold For, Last Tax Assestment, Last Tax Year, Address, City, State, Zipcode, Property Type, Address Full, Facts, Days On Trulia, Listing Agent Name, Listing Agent Contact No, Agent Name 1, Agent Contact No 1, Agent Name 2, Agent Contact No 2, Agent Name 3, Agent Contact No 3, Agent Name 4, Agent Contact No 4, Agent Name 5, Agent Contact No 5, Brokername, Image 1, Image 2, Image 3, Image 4, Image 5, Image 6, Image 7, Image 8, Image 9, Image 10, Image 11, Image 12, Image 13, Image 14, Image 15, Image 16, Image 17, Image 18, Image 19, Image 20, Image 21, Image 22, Image 23, Image 24, Image 25\nAcknowledgements\nWe wouldn't be here without the help of our in house web scraping and data mining teams at PromptCloud and DataStock.\nInspiration\nThis dataset was created keeping in mind our data scientists and researchers across the world.", "format": "arff", "uploader": "Elif Ceren Gok", "uploader_id": 30125, "visibility": "public", "creator": null, "contributor": null, "date": "2022-03-23 13:25:38", "update_comment": null, "last_update": "2022-03-23 13:25:38", "licence": "CC0: Public Domain", "status": "active", "error_message": null, "url": "https:\/\/www.openml.org\/data\/download\/22102300\/dataset", "default_target_attribute": null, "row_id_attribute": null, "ignore_attribute": null, "runs": 0, "suggest": { "input": [ "Trulia-Property-Listing-Dataset-2020", "Context This dataset was created by our in house Web Scraping and Data Mining teams at PromptCloud and DataStock. You can download the full dataset here. This sample contains 30K records. Content This dataset contains the following: Total Records Count: 78519 Domain Name: trulia.com Date Range :: 01st Jan 2020 - 31st Jan 2020 File Extension : csv Available Fields:: Uniq Id, Crawl Timestamp, Url, Title, Description, Price, Image Url, Style, Sqr Ft, Longitude, Latitude, Home Id, Lot Size, Beds, Ba " ], "weight": 5 }, "qualities": { "NumberOfInstances": 5000, "NumberOfFeatures": 68, "NumberOfClasses": null, "NumberOfMissingValues": 129439, "NumberOfInstancesWithMissingValues": 5000, "NumberOfNumericFeatures": 24, "NumberOfSymbolicFeatures": 0, "Dimensionality": 0.0136, "PercentageOfNumericFeatures": 35.294117647058826, "MajorityClassPercentage": null, "PercentageOfSymbolicFeatures": 0, "MajorityClassSize": null, "MinorityClassPercentage": null, "MinorityClassSize": null, "NumberOfBinaryFeatures": 0, "PercentageOfBinaryFeatures": 0, "PercentageOfInstancesWithMissingValues": 100, "AutoCorrelation": null, "PercentageOfMissingValues": 38.07029411764706 }, "tags": [ { "uploader": "38960", "tag": "Computer Systems" }, { "uploader": "38960", "tag": "Machine Learning" } ], "features": [ { "name": "Uniq_Id", "index": "0", "type": "string", "distinct": "5000", "missing": "0" }, { "name": "Crawl_Timestamp", "index": "1", "type": "string", "distinct": "4551", "missing": "0" }, { "name": "Url", "index": "2", "type": "string", "distinct": "5000", "missing": "0" }, { "name": "Title", "index": "3", "type": "string", "distinct": "4998", "missing": "0" }, { "name": "Description", "index": "4", "type": "string", "distinct": "4962", "missing": "0" }, { "name": "Price", "index": "5", "type": "string", "distinct": "1782", "missing": "0" }, { "name": "Image_Url", "index": "6", "type": "string", "distinct": "4737", "missing": "253" }, { "name": "Style", "index": "7", "type": "string", "distinct": "108", "missing": "288" }, { "name": "Sqr_Ft", "index": "8", "type": "string", "distinct": "2220", "missing": "283" }, { "name": "Longitude", "index": "9", "type": "numeric", "distinct": "4901", "missing": "0", "min": "-150", "max": "0", "mean": "-94", "stdev": "15" }, { "name": "Latitude", "index": "10", "type": "numeric", "distinct": "4908", "missing": "0", "min": "26", "max": "61", "mean": "36", "stdev": "6" }, { "name": "Home_Id", "index": "11", "type": "numeric", "distinct": "4999", "missing": "0", "min": "111743", "max": "2147483647", "mean": "2147483647", "stdev": "1368314108" }, { "name": "Lot_Size", "index": "12", "type": "string", "distinct": "2053", "missing": "762" }, { "name": "Beds", "index": "13", "type": "numeric", "distinct": "17", "missing": "335", "min": "1", "max": "36", "mean": "3", "stdev": "1" }, { "name": "Bath", "index": "14", "type": "numeric", "distinct": "28", "missing": "311", "min": "1", "max": "36", "mean": "2", "stdev": "1" }, { "name": "Year_Built", "index": "15", "type": "numeric", "distinct": "144", "missing": "500", "min": "1636", "max": "2020", "mean": "1973", "stdev": "34" }, { "name": "Price_Sqr_Ft", "index": "16", "type": "string", "distinct": "769", "missing": "476" }, { "name": "Features", "index": "17", "type": "string", "distinct": "4999", "missing": "0" }, { "name": "Last_Sold_Year", "index": "18", "type": "numeric", "distinct": "28", "missing": "2583", "min": "1993", "max": "2020", "mean": "2012", "stdev": "7" }, { "name": "Last_Sold_For", "index": "19", "type": "string", "distinct": "1148", "missing": "2583" }, { "name": "Last_Tax_Assestment", "index": "20", "type": "string", "distinct": "2807", "missing": "1915" }, { "name": "Last_Tax_Year", "index": "21", "type": "numeric", "distinct": "10", "missing": "1923", "min": "2009", "max": "2019", "mean": "2018", "stdev": "1" }, { "name": "Address", "index": "22", "type": "string", "distinct": "4998", "missing": "0" }, { "name": "City", "index": "23", "type": "string", "distinct": "401", "missing": "0" }, { "name": "State", "index": "24", "type": "string", "distinct": "34", "missing": "0" }, { "name": "Zipcode", "index": "25", "type": "numeric", "distinct": "1879", "missing": "0", "min": "2109", "max": "99587", "mean": "57725", "stdev": "27223" }, { "name": "Property_Type", "index": "26", "type": "numeric", "distinct": "0", "missing": "5000", "min": "2147483647", "max": "0", "mean": "0", "stdev": "0" }, { "name": "Address_Full", "index": "27", "type": "string", "distinct": "4998", "missing": "0" }, { "name": "Facts", "index": "28", "type": "string", "distinct": "3904", "missing": "1072" }, { "name": "Days_On_Trulia", "index": "29", "type": "numeric", "distinct": "117", "missing": "850", "min": "2", "max": "180", "mean": "24", "stdev": "30" }, { "name": "Listing_Agent_Name", "index": "30", "type": "numeric", "distinct": "0", "missing": "5000", "min": "2147483647", "max": "0", "mean": "0", "stdev": "0" }, { "name": "Listing_Agent_Contact_No", "index": "31", "type": "numeric", "distinct": "0", "missing": "5000", "min": "2147483647", "max": "0", "mean": "0", "stdev": "0" }, { "name": "Agent_Name_1", "index": "32", "type": "numeric", "distinct": "0", "missing": "5000", "min": "2147483647", "max": "0", "mean": "0", "stdev": "0" }, { "name": "Agent_Contact_No_1", "index": "33", "type": "numeric", "distinct": "0", "missing": "5000", "min": "2147483647", "max": "0", "mean": "0", "stdev": "0" }, { "name": "Agent_Name_2", "index": "34", "type": "numeric", "distinct": "0", "missing": "5000", "min": "2147483647", "max": "0", "mean": "0", "stdev": "0" }, { "name": "Agent_Contact_No_2", "index": "35", "type": "numeric", "distinct": "0", "missing": "5000", "min": "2147483647", "max": "0", "mean": "0", "stdev": "0" }, { "name": "Agent_Name_3", "index": "36", "type": "numeric", "distinct": "0", "missing": "5000", "min": "2147483647", "max": "0", "mean": "0", "stdev": "0" }, { "name": "Agent_Contact_No_3", "index": "37", "type": "numeric", "distinct": "0", "missing": "5000", "min": "2147483647", "max": "0", "mean": "0", "stdev": "0" }, { "name": "Agent_Name_4", "index": "38", "type": "numeric", "distinct": "0", "missing": "5000", "min": "2147483647", "max": "0", "mean": "0", "stdev": "0" }, { "name": "Agent_Contact_No_4", "index": "39", "type": "numeric", "distinct": "0", "missing": "5000", "min": "2147483647", "max": "0", "mean": "0", "stdev": "0" }, { "name": "Agent_Name_5", "index": "40", "type": "numeric", "distinct": "0", "missing": "5000", "min": "2147483647", "max": "0", "mean": "0", "stdev": "0" }, { "name": "Agent_Contact_No_5", "index": "41", "type": "numeric", "distinct": "0", "missing": "5000", "min": "2147483647", "max": "0", "mean": "0", "stdev": "0" }, { "name": "Brokername", "index": "42", "type": "numeric", "distinct": "0", "missing": "5000", "min": "2147483647", "max": "0", "mean": "0", "stdev": "0" }, { "name": "Image_1", "index": "43", "type": "string", "distinct": "4737", "missing": "253" }, { "name": "Image_2", "index": "44", "type": "string", "distinct": "4426", "missing": "564" }, { "name": "Image_3", "index": "45", "type": "string", "distinct": "4284", "missing": "707" }, { "name": "Image_4", "index": "46", "type": "string", "distinct": "4171", "missing": "820" }, { "name": "Image_5", "index": "47", "type": "string", "distinct": "4057", "missing": "935" }, { "name": "Image_6", "index": "48", "type": "string", "distinct": "3897", "missing": "1095" }, { "name": "Image_7", "index": "49", "type": "string", "distinct": "3807", "missing": "1185" }, { "name": "Image_8", "index": "50", "type": "string", "distinct": "3706", "missing": "1286" }, { "name": "Image_9", "index": "51", "type": "string", "distinct": "3627", "missing": "1366" }, { "name": "Image_10", "index": "52", "type": "string", "distinct": "3531", "missing": "1462" }, { "name": "Image_11", "index": "53", "type": "string", "distinct": "3418", "missing": "1577" }, { "name": "Image_12", "index": "54", "type": "string", "distinct": "3312", "missing": "1684" }, { "name": "Image_13", "index": "55", "type": "string", "distinct": "3206", "missing": "1790" }, { "name": "Image_14", "index": "56", "type": "string", "distinct": "3094", "missing": "1902" }, { "name": "Image_15", "index": "57", "type": "string", "distinct": "2966", "missing": "2031" }, { "name": "Image_16", "index": "58", "type": "string", "distinct": "2868", "missing": "2129" }, { "name": "Image_17", "index": "59", "type": "string", "distinct": "2737", "missing": "2260" }, { "name": "Image_18", "index": "60", "type": "string", "distinct": "2636", "missing": "2361" }, { "name": "Image_19", "index": "61", "type": "string", "distinct": "2515", "missing": "2482" }, { "name": "Image_20", "index": "62", "type": "string", "distinct": "2419", "missing": "2578" }, { "name": "Image_21", "index": "63", "type": "string", "distinct": "2286", "missing": "2711" }, { "name": "Image_22", "index": "64", "type": "string", "distinct": "2158", "missing": "2838" }, { "name": "Image_23", "index": "65", "type": "string", "distinct": "2031", "missing": "2966" }, { "name": "Image_24", "index": "66", "type": "string", "distinct": "1912", "missing": "3085" }, { "name": "Image_25", "index": "67", "type": "string", "distinct": "1759", "missing": "3238" } ], "nr_of_issues": 0, "nr_of_downvotes": 0, "nr_of_likes": 0, "nr_of_downloads": 0, "total_downloads": 0, "reach": 0, "reuse": 0, "impact_of_reuse": 0, "reach_of_reuse": 0, "impact": 0 }