{ "data_id": "44065", "name": "nyc-taxi-green-dec-2016", "exact_name": "nyc-taxi-green-dec-2016", "version": 9, "version_label": null, "description": "Dataset used in the tabular data benchmark https:\/\/github.com\/LeoGrin\/tabular-benchmark, \n transformed in the same way. This dataset belongs to the \"regression on categorical and\n numerical features\" benchmark. Original description: \n \nString datetime information extracted to numeric columns.Trip Record Data provided by the New York City Taxi and Limousine Commission (TLC) [http:\/\/www.nyc.gov\/html\/tlc\/html\/about\/trip_record_data.shtml]. The dataset includes TLC trips of the green line in December 2016. Data was downloaded on 03.11.2018. For a description of all variables in the dataset checkout the TLC homepage [http:\/\/www.nyc.gov\/html\/tlc\/downloads\/pdf\/data_dictionary_trip_records_green.pdf]. The variable 'tip_amount' was chosen as target variable. The variable 'total_amount' is ignored by default, otherwise the target could be predicted deterministically. The date variables 'lpep_pickup_datetime' and 'lpep_dropoff_datetime' (ignored by default) could be used to compute additional time features. In this version, we chose only trips with 'payment_type' == 1 (credit card), as tips are not included for most other payment types. We also removed the variables 'trip_distance' and 'fare_amount' to increase the importance of the categorical features 'PULocationID' and 'DOLocationID'.", "format": "arff", "uploader": "Leo Grin", "uploader_id": 26324, "visibility": "public", "creator": null, "contributor": "\"Leo Grin\"", "date": "2022-06-21 10:33:33", "update_comment": null, "last_update": "2022-06-21 10:33:33", "licence": "Public", "status": "active", "error_message": null, "url": "https:\/\/old.openml.org\/data\/download\/22103161\/dataset", "default_target_attribute": "tip_amount", "row_id_attribute": null, "ignore_attribute": null, "runs": 0, "suggest": { "input": [ "nyc-taxi-green-dec-2016", "Dataset used in the tabular data benchmark https:\/\/github.com\/LeoGrin\/tabular-benchmark, transformed in the same way. This dataset belongs to the \"regression on categorical and numerical features\" benchmark. Original description: String datetime information extracted to numeric columns.Trip Record Data provided by the New York City Taxi and Limousine Commission (TLC) [http:\/\/www.nyc.gov\/html\/tlc\/html\/about\/trip_record_data.shtml]. The dataset includes TLC trips of the green line in December 2016 " ], "weight": 5 }, "qualities": { "NumberOfInstances": 581835, "NumberOfFeatures": 17, "NumberOfClasses": 0, "NumberOfMissingValues": 0, "NumberOfInstancesWithMissingValues": 0, "NumberOfNumericFeatures": 10, "NumberOfSymbolicFeatures": 7, "PercentageOfBinaryFeatures": 17.647058823529413, "PercentageOfInstancesWithMissingValues": 0, "PercentageOfMissingValues": 0, "AutoCorrelation": 0.3560496958964945, "PercentageOfNumericFeatures": 58.82352941176471, "Dimensionality": 2.9217905419921456e-5, "PercentageOfSymbolicFeatures": 41.17647058823529, "MajorityClassPercentage": null, "MajorityClassSize": null, "MinorityClassPercentage": null, "MinorityClassSize": null, "NumberOfBinaryFeatures": 3 }, "tags": [ { "uploader": "38960", "tag": "Computer Systems" }, { "uploader": "38960", "tag": "Mathematics" } ], "features": [ { "name": "tip_amount", "index": "16", "type": "numeric", "distinct": "1811", "missing": "0", "target": "1", "min": "-2", "max": "6", "mean": "1", "stdev": "1" }, { "name": "VendorID", "index": "0", "type": "nominal", "distinct": "2", "missing": "0", "distr": [] }, { "name": "store_and_fwd_flag", "index": "1", "type": "nominal", "distinct": "2", "missing": "0", "distr": [] }, { "name": "RatecodeID", "index": "2", "type": "nominal", "distinct": "5", "missing": "0", "distr": [] }, { "name": "passenger_count", "index": "3", "type": "numeric", "distinct": "10", "missing": "0", "min": "0", "max": "9", "mean": "1", "stdev": "1" }, { "name": "extra", "index": "4", "type": "nominal", "distinct": "5", "missing": "0", "distr": [] }, { "name": "mta_tax", "index": "5", "type": "nominal", "distinct": "3", "missing": "0", "distr": [] }, { "name": "tolls_amount", "index": "6", "type": "numeric", "distinct": "105", "missing": "0", "min": "0", "max": "98", "mean": "0", "stdev": "1" }, { "name": "improvement_surcharge", "index": "7", "type": "nominal", "distinct": "3", "missing": "0", "distr": [] }, { "name": "total_amount", "index": "8", "type": "numeric", "distinct": "5377", "missing": "0", "min": "-63", "max": "712", "mean": "17", "stdev": "12" }, { "name": "trip_type", "index": "9", "type": "nominal", "distinct": "2", "missing": "0", "distr": [] }, { "name": "lpep_pickup_datetime_day", "index": "10", "type": "numeric", "distinct": "31", "missing": "0", "min": "1", "max": "31", "mean": "15", "stdev": "9" }, { "name": "lpep_pickup_datetime_hour", "index": "11", "type": "numeric", "distinct": "24", "missing": "0", "min": "0", "max": "23", "mean": "14", "stdev": "7" }, { "name": "lpep_pickup_datetime_minute", "index": "12", "type": "numeric", "distinct": "60", "missing": "0", "min": "0", "max": "59", "mean": "30", "stdev": "17" }, { "name": "lpep_dropoff_datetime_day", "index": "13", "type": "numeric", "distinct": "31", "missing": "0", "min": "1", "max": "31", "mean": "15", "stdev": "9" }, { "name": "lpep_dropoff_datetime_hour", "index": "14", "type": "numeric", "distinct": "24", "missing": "0", "min": "0", "max": "23", "mean": "14", "stdev": "7" }, { "name": "lpep_dropoff_datetime_minute", "index": "15", "type": "numeric", "distinct": "60", "missing": "0", "min": "0", "max": "59", "mean": "30", "stdev": "17" } ], "nr_of_issues": 0, "nr_of_downvotes": 0, "nr_of_likes": 0, "nr_of_downloads": 0, "total_downloads": 0, "reach": 0, "reuse": 0, "impact_of_reuse": 0, "reach_of_reuse": 0, "impact": 0 }