{ "data_id": "43306", "name": "Used-Cars-Dataset", "exact_name": "Used-Cars-Dataset", "version": 2, "version_label": "v1.0", "description": "Context\nCraigslist is the world's largest collection of used vehicles for sale, yet it's very difficult to collect all of them in the same place. I built a scraper for a school project and expanded upon it later to create this dataset which includes every used vehicle entry within the United States on Craigslist.\n\nContent\nThis data is scraped every few months, it contains most all relevant information that Craigslist provides on car sales including columns like price, condition, manufacturer, latitude\/longitude, and 18 other categories. For ML projects, consider feature engineering on location columns such as long\/lat. For previous listings, check older versions of the dataset.\n\nSee https:\/\/github.com\/AustinReese\/UsedVehicleSearch", "format": "arff", "uploader": "Dustin Carrion", "uploader_id": 30123, "visibility": "public", "creator": null, "contributor": null, "date": "2022-03-23 00:44:25", "update_comment": null, "last_update": "2022-03-23 00:44:25", "licence": "CC0: Public Domain", "status": "active", "error_message": null, "url": "https:\/\/www.openml.org\/data\/download\/22102131\/dataset", "default_target_attribute": null, "row_id_attribute": null, "ignore_attribute": "\"id\"", "runs": 0, "suggest": { "input": [ "Used-Cars-Dataset", "Context Craigslist is the world's largest collection of used vehicles for sale, yet it's very difficult to collect all of them in the same place. I built a scraper for a school project and expanded upon it later to create this dataset which includes every used vehicle entry within the United States on Craigslist. Content This data is scraped every few months, it contains most all relevant information that Craigslist provides on car sales including columns like price, condition, manufacturer, lat " ], "weight": 5 }, "qualities": { "NumberOfInstances": 426880, "NumberOfFeatures": 25, "NumberOfClasses": null, "NumberOfMissingValues": 1655336, "NumberOfInstancesWithMissingValues": 426880, "NumberOfNumericFeatures": 6, "NumberOfSymbolicFeatures": 0, "Dimensionality": 5.856446776611694e-5, "PercentageOfNumericFeatures": 24, "MajorityClassPercentage": null, "PercentageOfSymbolicFeatures": 0, "MajorityClassSize": null, "MinorityClassPercentage": null, "MinorityClassSize": null, "NumberOfBinaryFeatures": 0, "PercentageOfBinaryFeatures": 0, "PercentageOfInstancesWithMissingValues": 100, "AutoCorrelation": null, "PercentageOfMissingValues": 15.511019490254874 }, "tags": [ { "uploader": "38960", "tag": "Computer Systems" }, { "uploader": "38960", "tag": "Machine Learning" } ], "features": [ { "name": "id", "index": "0", "type": "numeric", "distinct": "426880", "missing": "0", "ignore": "1", "min": "2147483647", "max": "2147483647", "mean": "2147483647", "stdev": "4473170" }, { "name": "url", "index": "1", "type": "string", "distinct": "426880", "missing": "0" }, { "name": "region", "index": "2", "type": "string", "distinct": "404", "missing": "0" }, { "name": "region_url", "index": "3", "type": "string", "distinct": "413", "missing": "0" }, { "name": "price", "index": "4", "type": "numeric", "distinct": "15655", "missing": "0", "min": "0", "max": "2147483647", "mean": "75199", "stdev": "12182282" }, { "name": "year", "index": "5", "type": "numeric", "distinct": "114", "missing": "1205", "min": "1900", "max": "2022", "mean": "2011", "stdev": "9" }, { "name": "manufacturer", "index": "6", "type": "string", "distinct": "42", "missing": "17646" }, { "name": "model", "index": "7", "type": "string", "distinct": "29667", "missing": "5277" }, { "name": "condition", "index": "8", "type": "string", "distinct": "6", "missing": "174104" }, { "name": "cylinders", "index": "9", "type": "string", "distinct": "8", "missing": "177678" }, { "name": "fuel", "index": "10", "type": "string", "distinct": "5", "missing": "3013" }, { "name": "odometer", "index": "11", "type": "numeric", "distinct": "104870", "missing": "4400", "min": "0", "max": "10000000", "mean": "98043", "stdev": "213882" }, { "name": "title_status", "index": "12", "type": "string", "distinct": "6", "missing": "8242" }, { "name": "transmission", "index": "13", "type": "string", "distinct": "3", "missing": "2556" }, { "name": "VIN", "index": "14", "type": "string", "distinct": "118264", "missing": "161042" }, { "name": "drive", "index": "15", "type": "string", "distinct": "3", "missing": "130567" }, { "name": "size", "index": "16", "type": "string", "distinct": "4", "missing": "306361" }, { "name": "type", "index": "17", "type": "string", "distinct": "13", "missing": "92858" }, { "name": "paint_color", "index": "18", "type": "string", "distinct": "12", "missing": "130203" }, { "name": "image_url", "index": "19", "type": "string", "distinct": "241899", "missing": "68" }, { "name": "description", "index": "20", "type": "string", "distinct": "360911", "missing": "70" }, { "name": "county", "index": "21", "type": "numeric", "distinct": "0", "missing": "426880", "min": "2147483647", "max": "0", "mean": "0", "stdev": "0" }, { "name": "state", "index": "22", "type": "string", "distinct": "51", "missing": "0" }, { "name": "lat", "index": "23", "type": "numeric", "distinct": "53181", "missing": "6549", "min": "-84", "max": "82", "mean": "38", "stdev": "6" }, { "name": "long", "index": "24", "type": "numeric", "distinct": "53772", "missing": "6549", "min": "-160", "max": "174", "mean": "-95", "stdev": "18" }, { "name": "posting_date", "index": "25", "type": "string", "distinct": "381536", "missing": "68" } ], "nr_of_issues": 0, "nr_of_downvotes": 0, "nr_of_likes": 0, "nr_of_downloads": 0, "total_downloads": 0, "reach": 0, "reuse": 0, "impact_of_reuse": 0, "reach_of_reuse": 0, "impact": 0 }