{ "data_id": "43457", "name": "COVID19-Dataset-with-100-World-Countries", "exact_name": "COVID19-Dataset-with-100-World-Countries", "version": 1, "version_label": "v1.0", "description": "COVID19-Algeria-and-World-Dataset\nA coronavirus dataset with 104 countries constructed from different reliable sources, where each row represents a country, and the columns represent geographic, climate, healthcare, economic, and demographic factors that may contribute to accelerate\/slow the spread of the COVID-19. The assumptions for the different factors are as follows:\n\nGeography: some continents\/areas may be more affected by the disease\nClimate: cold temperatures may promote the spread of the virus\nHealthcare: lack of hospital beds\/doctors may lead to more human losses\nEconomy: weak economies (GDP) have fewer means to fight the disease\nDemography: older populations may be at higher risk of the disease\n\nThe last column represents the number of daily tests performed and the total number of cases and deaths reported each day.\nData description\n\nCountries in the dataset by geographic coordinates\n \n\nEurope: 33 countries\nAsia: 28 countries\nAfrica: 21 countries\nNorth America: 11 countries\nSouth America: 8 countries\nOceania: 3 countries\n\nStatistical description of the data\n\nData distribution\n\nDownload\nThe dataset is available in an encoded CSV form on GitHub.\nPython code\nThe Python Jupyter Notebook to read and visualize the data is available on nbviewer.\nData update\nThe dataset is updated every month with the latest numbers of COVID-19 cases, deaths, and tests. The last update was on March 01, 2021. \nData construction\nThe dataset is constructed from different reliable sources, where each row represents a country, and the columns represent geographic, climate, healthcare, economic, and demographic factors that may contribute to accelerate\/slow the spread of the coronavirus. Note that we selected only the main factors for which we found data and that other factors can be used. All data were retrieved from the reliable Our World in Data website, except for data on:\n\nContinents: www.kaggle.com\/statchaitya\/country-to-continent\nGeographic-coordinates: www.kaggle.com\/eidanch\/counties-geographic-coordinates\nTemperatures: www.kaggle.com\/berkeleyearth\/climate-change-earth-surface-temperature-data\nShare of the population over 65 years old: https:\/\/data.worldbank.org\/indicator\/SP.POP.65UP.TO.ZS\nGDP\/Capita: https:\/\/data.worldbank.org\/indicator\/NY.GDP.PCAP.CD\n\nCitation\nIf you want to use the dataset please cite the following arXiv paper, more details about the data construction are provided in it.\narticlebelkacem_covid-19_2020,\n title = COVID-19 data analysis and forecasting: Algeria and the world,\n shorttitle = COVID-19 data analysis and forecasting,\n journal = arXiv preprint arXiv:2007.09755,\n author = Belkacem, Sami,\n year = 2020\n\n\nContact\nIf you have any question or suggestion, please contact me at this email address: s.belkacemusthb.dz", "format": "arff", "uploader": "Onur Yildirim", "uploader_id": 30126, "visibility": "public", "creator": null, "contributor": null, "date": "2022-03-23 13:22:27", "update_comment": null, "last_update": "2022-03-23 13:22:27", "licence": "Attribution 4.0 International (CC BY 4.0)", "status": "active", "error_message": null, "url": "https:\/\/www.openml.org\/data\/download\/22102282\/dataset", "default_target_attribute": null, "row_id_attribute": null, "ignore_attribute": null, "runs": 0, "suggest": { "input": [ "COVID19-Dataset-with-100-World-Countries", "COVID19-Algeria-and-World-Dataset A coronavirus dataset with 104 countries constructed from different reliable sources, where each row represents a country, and the columns represent geographic, climate, healthcare, economic, and demographic factors that may contribute to accelerate\/slow the spread of the COVID-19. The assumptions for the different factors are as follows: Geography: some continents\/areas may be more affected by the disease Climate: cold temperatures may promote the spread of the " ], "weight": 5 }, "qualities": { "NumberOfInstances": 38472, "NumberOfFeatures": 15, "NumberOfClasses": null, "NumberOfMissingValues": 11759, "NumberOfInstancesWithMissingValues": 9603, "NumberOfNumericFeatures": 12, "NumberOfSymbolicFeatures": 0, "Dimensionality": 0.00038989394884591394, "PercentageOfNumericFeatures": 80, "MajorityClassPercentage": null, "PercentageOfSymbolicFeatures": 0, "MajorityClassSize": null, "MinorityClassPercentage": null, "MinorityClassSize": null, "NumberOfBinaryFeatures": 0, "PercentageOfBinaryFeatures": 0, "PercentageOfInstancesWithMissingValues": 24.96101060511541, "AutoCorrelation": null, "PercentageOfMissingValues": 2.0376724197684895 }, "tags": [ { "uploader": "38960", "tag": "Machine Learning" }, { "uploader": "38960", "tag": "Statistics" } ], "features": [ { "name": "Entity", "index": "0", "type": "string", "distinct": "104", "missing": "0" }, { "name": "Continent", "index": "1", "type": "string", "distinct": "6", "missing": "0" }, { "name": "Latitude", "index": "2", "type": "numeric", "distinct": "104", "missing": "0", "min": "-41", "max": "65", "mean": "24", "stdev": "26" }, { "name": "Longitude", "index": "3", "type": "numeric", "distinct": "104", "missing": "0", "min": "-106", "max": "179", "mean": "20", "stdev": "61" }, { "name": "Average_temperature_per_year", "index": "4", "type": "numeric", "distinct": "28", "missing": "0", "min": "-2", "max": "29", "mean": "18", "stdev": "8" }, { "name": "Hospital_beds_per_1000_people", "index": "5", "type": "numeric", "distinct": "77", "missing": "0", "min": "0", "max": "13", "mean": "3", "stdev": "3" }, { "name": "Medical_doctors_per_1000_people", "index": "6", "type": "numeric", "distinct": "86", "missing": "0", "min": "0", "max": "8", "mean": "2", "stdev": "2" }, { "name": "GDP\/Capita", "index": "7", "type": "numeric", "distinct": "104", "missing": "0", "min": "412", "max": "114705", "mean": "19002", "stdev": "22271" }, { "name": "Population", "index": "8", "type": "numeric", "distinct": "104", "missing": "0", "min": "341284", "max": "1339180127", "mean": "48969829", "stdev": "142725119" }, { "name": "Median_age", "index": "9", "type": "numeric", "distinct": "32", "missing": "0", "min": "16", "max": "48", "mean": "33", "stdev": "8" }, { "name": "Population_aged_65_and_over_(%)", "index": "10", "type": "numeric", "distinct": "23", "missing": "0", "min": "1", "max": "28", "mean": "11", "stdev": "7" }, { "name": "Date", "index": "11", "type": "string", "distinct": "425", "missing": "0" }, { "name": "Daily_tests", "index": "12", "type": "numeric", "distinct": "17747", "missing": "7895", "min": "-239172", "max": "2945871", "mean": "39441", "stdev": "150185" }, { "name": "Cases", "index": "13", "type": "numeric", "distinct": "26843", "missing": "254", "min": "1", "max": "28605669", "mean": "287903", "stdev": "1405243" }, { "name": "Deaths", "index": "14", "type": "numeric", "distinct": "10404", "missing": "3610", "min": "1", "max": "513091", "mean": "8091", "stdev": "29549" } ], "nr_of_issues": 0, "nr_of_downvotes": 0, "nr_of_likes": 0, "nr_of_downloads": 0, "total_downloads": 0, "reach": 0, "reuse": 0, "impact_of_reuse": 0, "reach_of_reuse": 0, "impact": 0 }