{ "data_id": "43584", "name": "New-York-Taxi-Trip-enriched-by-Mathematica", "exact_name": "New-York-Taxi-Trip-enriched-by-Mathematica", "version": 1, "version_label": "v1.0", "description": "Context\nThis data set was created to help Kaggle users in the New Your City Taxi Trip Duration competition. New features were generated using Wolfram Mathematica system.\nHope that this data set will help both young and experienced researchers in their data mastering path.\nAll sources can be found here.\nContent\nGiven dataset consists of both features from initial dataset and generated via Wolfram Mathematica computational system. Thus, all features can be split into following groups:\n\nInitial features (extracted from initial data),\nCalendar features (contains of season, day name and day period),\nWeather features (information about temperature, snow, and rain),\nTravel features (geo distance with estimated driving distance and time).\n\nDataset contains the following columns:\n\nid - a unique identifier for each trip,\nvendorId - a code indicating the provider associated with the trip record,\npassengerCount - the number of passengers in the vehicle (driver entered value),\nyear,\nmonth,\nday,\nhour,\nminute,\nsecond,\nseason,\ndayName,\ndayPeriod - day period, e.g. late night, morning, and etc.,\ntemperature,\nrain,\nsnow,\nstartLatitude,\nstartLongitude,\nendLatitude,\nendLongitude,\nflag - this flag indicates whether the trip record was held in vehicle memory before sending to the vendor because the vehicle did not have a connection to the server - Y=store and forward; N=not a store and forward trip,\ndrivingDistance - driving distance, estimated via Wolfram Mathematica system,\ndrivingTime - driving time, estimated via Wolfram Mathematica system,\ngeoDistance - distance between starting and ending points,\ntripDuration - duration of the trip in seconds (value -1 indicates test rows).", "format": "arff", "uploader": "Dustin Carrion", "uploader_id": 30123, "visibility": "public", "creator": null, "contributor": null, "date": "2022-03-24 00:15:28", "update_comment": null, "last_update": "2022-03-24 00:15:28", "licence": "CC0: Public Domain", "status": "active", "error_message": null, "url": "https:\/\/www.openml.org\/data\/download\/22102409\/dataset", "default_target_attribute": null, "row_id_attribute": null, "ignore_attribute": null, "runs": 0, "suggest": { "input": [ "New-York-Taxi-Trip-enriched-by-Mathematica", "Context This data set was created to help Kaggle users in the New Your City Taxi Trip Duration competition. New features were generated using Wolfram Mathematica system. Hope that this data set will help both young and experienced researchers in their data mastering path. All sources can be found here. Content Given dataset consists of both features from initial dataset and generated via Wolfram Mathematica computational system. Thus, all features can be split into following groups: Initial feat " ], "weight": 5 }, "qualities": { "NumberOfInstances": 2083778, "NumberOfFeatures": 24, "NumberOfClasses": null, "NumberOfMissingValues": 6810, "NumberOfInstancesWithMissingValues": 4847, "NumberOfNumericFeatures": 19, "NumberOfSymbolicFeatures": 0, "Dimensionality": 1.1517541695900428e-5, "PercentageOfNumericFeatures": 79.16666666666666, "MajorityClassPercentage": null, "PercentageOfSymbolicFeatures": 0, "MajorityClassSize": null, "MinorityClassPercentage": null, "MinorityClassSize": null, "NumberOfBinaryFeatures": 0, "PercentageOfBinaryFeatures": 0, "PercentageOfInstancesWithMissingValues": 0.23260635250012238, "AutoCorrelation": null, "PercentageOfMissingValues": 0.013617093567548942 }, "tags": [ { "uploader": "38960", "tag": "Machine Learning" }, { "uploader": "38960", "tag": "Mathematics" } ], "features": [ { "name": "id", "index": "0", "type": "string", "distinct": "2083778", "missing": "0" }, { "name": "vendorId", "index": "1", "type": "numeric", "distinct": "2", "missing": "0", "min": "1", "max": "2", "mean": "2", "stdev": "0" }, { "name": "passengerCount", "index": "2", "type": "numeric", "distinct": "10", "missing": "0", "min": "0", "max": "9", "mean": "2", "stdev": "1" }, { "name": "year", "index": "3", "type": "numeric", "distinct": "1", "missing": "0", "min": "2016", "max": "2016", "mean": "2016", "stdev": "0" }, { "name": "month", "index": "4", "type": "numeric", "distinct": "6", "missing": "0", "min": "1", "max": "6", "mean": "4", "stdev": "2" }, { "name": "day", "index": "5", "type": "numeric", "distinct": "31", "missing": "0", "min": "1", "max": "31", "mean": "15", "stdev": "9" }, { "name": "hour", "index": "6", "type": "numeric", "distinct": "24", "missing": "0", "min": "0", "max": "23", "mean": "14", "stdev": "6" }, { "name": "minute", "index": "7", "type": "numeric", "distinct": "60", "missing": "0", "min": "0", "max": "59", "mean": "30", "stdev": "17" }, { "name": "second", "index": "8", "type": "numeric", "distinct": "60", "missing": "0", "min": "0", "max": "59", "mean": "29", "stdev": "17" }, { "name": "season", "index": "9", "type": "string", "distinct": "3", "missing": "0" }, { "name": "dayName", "index": "10", "type": "string", "distinct": "7", "missing": "0" }, { "name": "dayPeriod", "index": "11", "type": "string", "distinct": "5", "missing": "0" }, { "name": "temperature", "index": "12", "type": "numeric", "distinct": "1800606", "missing": "0", "min": "-1031", "max": "48", "mean": "7", "stdev": "53" }, { "name": "rain", "index": "13", "type": "numeric", "distinct": "2", "missing": "0", "min": "0", "max": "1", "mean": "0", "stdev": "0" }, { "name": "snow", "index": "14", "type": "numeric", "distinct": "2", "missing": "0", "min": "0", "max": "1", "mean": "0", "stdev": "0" }, { "name": "startLatitude", "index": "15", "type": "numeric", "distinct": "48068", "missing": "0", "min": "34", "max": "52", "mean": "41", "stdev": "0" }, { "name": "startLongitude", "index": "16", "type": "numeric", "distinct": "24960", "missing": "0", "min": "-122", "max": "0", "mean": "-74", "stdev": "0" }, { "name": "endLatitude", "index": "17", "type": "numeric", "distinct": "67086", "missing": "0", "min": "32", "max": "49", "mean": "41", "stdev": "0" }, { "name": "endLongitude", "index": "18", "type": "numeric", "distinct": "36977", "missing": "0", "min": "-122", "max": "0", "mean": "-74", "stdev": "0" }, { "name": "flag", "index": "19", "type": "string", "distinct": "2", "missing": "0" }, { "name": "drivingDistance", "index": "20", "type": "numeric", "distinct": "1720327", "missing": "4847", "min": "0", "max": "1210", "mean": "5", "stdev": "5" }, { "name": "drivingTime", "index": "21", "type": "numeric", "distinct": "230", "missing": "1963", "min": "0", "max": "48720", "mean": "244", "stdev": "231" }, { "name": "geoDistance", "index": "22", "type": "numeric", "distinct": "2075378", "missing": "0", "min": "0", "max": "1241", "mean": "118", "stdev": "273" }, { "name": "tripDuration", "index": "23", "type": "numeric", "distinct": "7418", "missing": "0", "min": "-1", "max": "3526282", "mean": "671", "stdev": "4404" } ], "nr_of_issues": 0, "nr_of_downvotes": 0, "nr_of_likes": 0, "nr_of_downloads": 0, "total_downloads": 0, "reach": 0, "reuse": 0, "impact_of_reuse": 0, "reach_of_reuse": 0, "impact": 0 }