{ "data_id": "46240", "name": "BitInfoCharts-wo-tweets-preprocessed", "exact_name": "BitInfoCharts-wo-tweets-preprocessed", "version": 1, "version_label": null, "description": "Bitcoin data scrapped from BitInfoCharts, without 'tweets' and with preprocessing.\n\nSeveral Bitcoin related data scrapped directly from BitInfoCharts. 'date' in the format %Y-%m-%d.\nThe 'tweets' column was dropped due to too many nan values (values only between 2014-04-09 and 2023-03-14).\nBesides, we have only kept the rows between the max(dates with non NaN values of each column) and min(dates with non NaN values of each column), which\nleave us with dates between 2011-04-14 and 2024-05-26.\n\nThere are 21 columns:\n\nid_series: The id of the time series.\n\ndate: The date of the time series in the format \"%Y-%m-%d\".\n\ntime_step: The time step on the time series.\n\nvalue_X (X from 0 to 17): The values of the time series, which will be used for the forecasting task.\n\nPreprocessing:\n\n1 - Renamed columns to 'date' and 'value_X' with X from 0 to 17 (number of columns of original dataset).\n\n2 - Created columns 'time_step' and 'id_series'. There is only one 'id_series' (0).\n\n3 - Ensured that there are no missing dates and that the frequency of the time_series is daily.\n\n4 - Filled nan values by propagating the last valid observation to next valid (ffill).\n\nThe columns with some missing values were:\n'median_transaction_fee': 'value_9'\n'confirmationtime': 'value_10'\n'activeaddresses': 'value_15'\n'top100cap': 'value_16'\n\n5 - Casted 'date' to str, 'time_step' to int, 'value_X' to float, and defined 'id_series' as 'category'.", "format": "arff", "uploader": "Bruno Belucci Teixeira", "uploader_id": 30703, "visibility": "public", "creator": "\"BitInfoCharts\"", "contributor": "\"Bruno Belucci\"", "date": "2024-06-25 00:45:50", "update_comment": null, "last_update": "2024-06-25 00:45:50", "licence": "Public Domain", "status": "active", "error_message": null, "url": "https:\/\/api.openml.org\/data\/download\/22120704\/dataset", "kaggle_url": null, "default_target_attribute": null, "row_id_attribute": null, "ignore_attribute": null, "runs": 0, "suggest": { "input": [ "BitInfoCharts-wo-tweets-preprocessed", "Bitcoin data scrapped from BitInfoCharts, without 'tweets' and with preprocessing. Several Bitcoin related data scrapped directly from BitInfoCharts. 'date' in the format %Y-%m-%d. The 'tweets' column was dropped due to too many nan values (values only between 2014-04-09 and 2023-03-14). Besides, we have only kept the rows between the max(dates with non NaN values of each column) and min(dates with non NaN values of each column), which leave us with dates between 2011-04-14 and 2024-05-26. There " ], "weight": 5 }, "qualities": { "NumberOfInstances": 4792, "NumberOfFeatures": 21, "NumberOfClasses": null, "NumberOfMissingValues": 0, "NumberOfInstancesWithMissingValues": 0, "NumberOfNumericFeatures": 19, "NumberOfSymbolicFeatures": 1, "PercentageOfBinaryFeatures": 0, "PercentageOfInstancesWithMissingValues": 0, "PercentageOfMissingValues": 0, "AutoCorrelation": null, "PercentageOfNumericFeatures": 90.47619047619048, "Dimensionality": 0.004382303839732888, "PercentageOfSymbolicFeatures": 4.761904761904762, "MajorityClassPercentage": null, "MajorityClassSize": null, "MinorityClassPercentage": null, "MinorityClassSize": null, "NumberOfBinaryFeatures": 0 }, "tags": [], "features": [ { "name": "id_series", "index": "0", "type": "nominal", "distinct": "1", "missing": "0", "distr": [] }, { "name": "date", "index": "1", "type": "string", "distinct": "4792", "missing": "0" }, { "name": "value_0", "index": "2", "type": "numeric", "distinct": "4745", "missing": "0", "min": "1768", "max": "926842", "mean": "211036", "stdev": "138616" }, { "name": "value_1", "index": "3", "type": "numeric", "distinct": "4761", "missing": "0", "min": "4342", "max": "998092", "mean": "567936", "stdev": "309866" }, { "name": "value_2", "index": "4", "type": "numeric", "distinct": "4773", "missing": "0", "min": "3033", "max": "842802", "mean": "308651", "stdev": "189733" }, { "name": "value_3", "index": "5", "type": "numeric", "distinct": "726", "missing": "0", "min": "82346", "max": "2147483647", "mean": "2147483647", "stdev": "2147483647" }, { "name": "value_4", "index": "6", "type": "numeric", "distinct": "4792", "missing": "0", "min": "2147483647", "max": "2147483647", "mean": "2147483647", "stdev": "2147483647" }, { "name": "value_5", "index": "7", "type": "numeric", "distinct": "4628", "missing": "0", "min": "1", "max": "72677", "mean": "12134", "stdev": "17161" }, { "name": "value_6", "index": "8", "type": "numeric", "distinct": "2957", "missing": "0", "min": "0", "max": "55839", "mean": "1091", "stdev": "3938" }, { "name": "value_7", "index": "9", "type": "numeric", "distinct": "4792", "missing": "0", "min": "100575", "max": "2147483647", "mean": "2147483647", "stdev": "2147483647" }, { "name": "value_8", "index": "10", "type": "numeric", "distinct": "3238", "missing": "0", "min": "0", "max": "128", "mean": "2", "stdev": "6" }, { "name": "value_9", "index": "11", "type": "numeric", "distinct": "2414", "missing": "0", "min": "0", "max": "92", "mean": "1", "stdev": "3" }, { "name": "value_10", "index": "12", "type": "numeric", "distinct": "159", "missing": "0", "min": "5", "max": "25", "mean": "10", "stdev": "1" }, { "name": "value_11", "index": "13", "type": "numeric", "distinct": "4792", "missing": "0", "min": "5732314", "max": "2147483647", "mean": "2147483647", "stdev": "2147483647" }, { "name": "value_12", "index": "14", "type": "numeric", "distinct": "4624", "missing": "0", "min": "49", "max": "2450460", "mean": "91637", "stdev": "200231" }, { "name": "value_13", "index": "15", "type": "numeric", "distinct": "4748", "missing": "0", "min": "0", "max": "5417", "mean": "309", "stdev": "399" }, { "name": "value_14", "index": "16", "type": "numeric", "distinct": "3572", "missing": "0", "min": "0", "max": "617", "mean": "37", "stdev": "51" }, { "name": "value_15", "index": "17", "type": "numeric", "distinct": "4760", "missing": "0", "min": "4047", "max": "1196069", "mean": "484175", "stdev": "311476" }, { "name": "value_16", "index": "18", "type": "numeric", "distinct": "3362", "missing": "0", "min": "13", "max": "2245646", "mean": "855", "stdev": "33903" }, { "name": "value_17", "index": "19", "type": "numeric", "distinct": "3241", "missing": "0", "min": "0", "max": "75", "mean": "4", "stdev": "5" }, { "name": "time_step", "index": "20", "type": "numeric", "distinct": "4792", "missing": "0", "min": "0", "max": "4791", "mean": "2396", "stdev": "1383" } ], "nr_of_issues": 0, "nr_of_downvotes": 0, "nr_of_likes": 0, "nr_of_downloads": 0, "total_downloads": 0, "reach": 0, "reuse": 0, "impact_of_reuse": 0, "reach_of_reuse": 0, "impact": 0 }