{ "data_id": "43495", "name": "COVID-19-Mexico-Clean--Order-by-States", "exact_name": "COVID-19-Mexico-Clean--Order-by-States", "version": 1, "version_label": "v1.0", "description": "Context\nThe data obtained from the Mexico's General Direction of Epidemiology contains multiple information on the current pandemic situation. However, these data are saturated with features that may not be very useful in a predictive analysis. \nDue to this I decided to clean and format the original data and generate a dataset that groups confirmed, dead, recovered and active cases by State, Municipality and Date.\nThis is very useful if you want to generate geographically specific models\nContent\nThe data set contains the covid cases columns (positive, dead, recovered and active) that are counted by state and municipality.\nI.e\n\n\n\nSate\nMunicipality\nDate\nDeaths\nConfirmed\nrecovered\nActive\n\n\n\n\nCiudad de Mexico\nIztapalapa\n2020-07-18\n1\n42\n0\n41\n\n\nCiudad de Mexico\nIztapalapa\n2020-07-19\n0\n14\n0\n14\n\n\nCiudad de Mexico\nIztapalapa\n2020-07-20\n0\n41\n0\n41\n\n\n\nWould you like to see the data cleaning notebook?\nYou can check it in my Github\nClassification criteria\n\nRecovered cases: If the patient is not dead and it has been more than 15 days then he is considered as recovered.\nActive cases: If the patien isn't recovered an isn't dead then is active\n\nTime lapse\nThe first documented case is on 2020-01-13. \n The dataset will be updated every day adding new cases\nAcknowledgements\nFor this project, the data are obtained from the official URL of the government of Mxico whose author is Direccin General de Epidemiologa:\nCorona Virus Data: https:\/\/www.gob.mx\/salud\/documentos\/datos-abiertos-152127\nData Dictionary: https:\/\/www.gob.mx\/salud\/documentos\/datos-abiertos-152127\nDifferences in results\nAccording to the official results obtained from: https:\/\/coronavirus.gob.mx\/datos\/\n\nThe main difference between the official data and this dataset is in the recovered cases. This is because the Mexican government only considers outpatient cases when counting recovered cases. This dataset considers outpatient and inpatient cases when counting recovered people.\nThe second difference is some rows that contained nonsense information(I think this was a data collection error by the institution), these were eliminated.", "format": "arff", "uploader": "Onur Yildirim", "uploader_id": 30126, "visibility": "public", "creator": null, "contributor": null, "date": "2022-03-23 13:27:58", "update_comment": null, "last_update": "2022-03-23 13:27:58", "licence": "CC0: Public Domain", "status": "active", "error_message": null, "url": "https:\/\/www.openml.org\/data\/download\/22102320\/dataset", "default_target_attribute": null, "row_id_attribute": null, "ignore_attribute": null, "runs": 0, "suggest": { "input": [ "COVID-19-Mexico-Clean--Order-by-States", "Context The data obtained from the Mexico's General Direction of Epidemiology contains multiple information on the current pandemic situation. However, these data are saturated with features that may not be very useful in a predictive analysis. Due to this I decided to clean and format the original data and generate a dataset that groups confirmed, dead, recovered and active cases by State, Municipality and Date. This is very useful if you want to generate geographically specific models Content " ], "weight": 5 }, "qualities": { "NumberOfInstances": 92320, "NumberOfFeatures": 7, "NumberOfClasses": null, "NumberOfMissingValues": 0, "NumberOfInstancesWithMissingValues": 0, "NumberOfNumericFeatures": 4, "NumberOfSymbolicFeatures": 0, "Dimensionality": 7.582322357019064e-5, "PercentageOfNumericFeatures": 57.14285714285714, "MajorityClassPercentage": null, "PercentageOfSymbolicFeatures": 0, "MajorityClassSize": null, "MinorityClassPercentage": null, "MinorityClassSize": null, "NumberOfBinaryFeatures": 0, "PercentageOfBinaryFeatures": 0, "PercentageOfInstancesWithMissingValues": 0, "AutoCorrelation": null, "PercentageOfMissingValues": 0 }, "tags": [ { "uploader": "38960", "tag": "Computer Systems" }, { "uploader": "38960", "tag": "Machine Learning" } ], "features": [ { "name": "State", "index": "0", "type": "string", "distinct": "32", "missing": "0" }, { "name": "Municipality", "index": "1", "type": "string", "distinct": "2057", "missing": "0" }, { "name": "Date", "index": "2", "type": "string", "distinct": "213", "missing": "0" }, { "name": "Deaths", "index": "3", "type": "numeric", "distinct": "37", "missing": "0", "min": "0", "max": "42", "mean": "1", "stdev": "2" }, { "name": "Confirmed", "index": "4", "type": "numeric", "distinct": "228", "missing": "0", "min": "1", "max": "346", "mean": "7", "stdev": "17" }, { "name": "Recovered", "index": "5", "type": "numeric", "distinct": "213", "missing": "0", "min": "0", "max": "324", "mean": "6", "stdev": "15" }, { "name": "Active", "index": "6", "type": "numeric", "distinct": "94", "missing": "0", "min": "0", "max": "135", "mean": "0", "stdev": "3" } ], "nr_of_issues": 0, "nr_of_downvotes": 0, "nr_of_likes": 0, "nr_of_downloads": 0, "total_downloads": 0, "reach": 0, "reuse": 0, "impact_of_reuse": 0, "reach_of_reuse": 0, "impact": 0 }