{ "data_id": "42207", "name": "particulate-matter-ukair-2017", "exact_name": "particulate-matter-ukair-2017", "version": 2, "version_label": null, "description": "Hourly particulate matter air polution data of Great Britain for the year 2017, provided by Ricardo Energy and Environment on behalf of the UK Department for Environment, Food and Rural Affairs (DEFRA) and the Devolved Administrations on [https:\/\/uk-air.defra.gov.uk\/]. The data was scraped from the UK AIR homepage via the R-package 'rdefra' [Vitolo, C., Russell, A., & Tucker, A. (2016, August). Rdefra: interact with the UK AIR pollution database from DEFRA. The Journal of Open Source Software, 1(4). doi:10.21105\/joss.00051] on 09.11.2018. The data was published by DEFRA under the Open Government Licence (OGL) [http:\/\/www.nationalarchives.gov.uk\/doc\/open-government-licence\/version\/2\/]. For a description of all variables, checkout the UK AIR homepage. The variable 'PM.sub.10..sub..particulate.matter..Hourly.measured.' was chosen as the target. The dataset also contains another measure of particulate matter 'PM.sub.2.5..sub..particulate.matter..Hourly.measured.' (ignored by default) which could be used as the target instead. The string variable 'datetime' (ignored by default) could be used to construct additional date\/time features. In this version of the dataset, the features 'Longitude' and 'Latitude' were removed to increase the importance of the categorical features 'Zone' and 'Site.Name'.", "format": "ARFF", "uploader": "Florian Pargent", "uploader_id": 1475, "visibility": "public", "creator": null, "contributor": null, "date": "2019-12-04 19:53:29", "update_comment": null, "last_update": "2019-12-04 19:53:29", "licence": "Open Government Licence (OGL)", "status": "active", "error_message": null, "url": "https:\/\/www.openml.org\/data\/download\/21770082\/file1f931adec58f.arff", "default_target_attribute": "PM.sub.10..sub..particulate.matter..Hourly.measured.", "row_id_attribute": null, "ignore_attribute": "\"datetime,PM.sub.2.5..sub..particulate.matter..Hourly.measured.\"", "runs": 0, "suggest": { "input": [ "particulate-matter-ukair-2017", "Hourly particulate matter air polution data of Great Britain for the year 2017, provided by Ricardo Energy and Environment on behalf of the UK Department for Environment, Food and Rural Affairs (DEFRA) and the Devolved Administrations on [https:\/\/uk-air.defra.gov.uk\/]. The data was scraped from the UK AIR homepage via the R-package 'rdefra' [Vitolo, C., Russell, A., & Tucker, A. (2016, August). Rdefra: interact with the UK AIR pollution database from DEFRA. The Journal of Open Source Software, 1 " ], "weight": 5 }, "qualities": { "NumberOfInstances": 394299, "NumberOfFeatures": 10, "NumberOfClasses": 0, "NumberOfMissingValues": 0, "NumberOfInstancesWithMissingValues": 0, "NumberOfNumericFeatures": 4, "NumberOfSymbolicFeatures": 5, "ClassEntropy": null, "MeanNoiseToSignalRatio": null, "Quartile2AttributeEntropy": null, "Dimensionality": 2.5361464269501065e-5, "MeanNominalAttDistinctValues": 21.2, "Quartile2KurtosisOfNumericAtts": 35.32576030193881, "EquivalentNumberOfAtts": null, "MeanSkewnessOfNumericAtts": 2.6530486253644163, "Quartile2MeansOfNumericAtts": 13.203000956127369, "MajorityClassPercentage": null, "MeanStdDevOfNumericAtts": 17.696196333568004, "Quartile2MutualInformation": null, "MajorityClassSize": null, "MinAttributeEntropy": null, "Quartile2SkewnessOfNumericAtts": 3.213288436131056, "MaxAttributeEntropy": null, "MinKurtosisOfNumericAtts": -1.2105328598670273, "PercentageOfBinaryFeatures": 0, "Quartile2StdDevOfNumericAtts": 10.547214841337752, "MaxKurtosisOfNumericAtts": 67.72556916675173, "MinMeansOfNumericAtts": 9.664257292054678, "PercentageOfInstancesWithMissingValues": 0, "Quartile3AttributeEntropy": null, "MaxMeansOfNumericAtts": 45.31677229714371, "MinMutualInformation": null, "PercentageOfMissingValues": 0, "Quartile3KurtosisOfNumericAtts": 65.82527075906305, "MaxMutualInformation": null, "MinNominalAttDistinctValues": 4, "PercentageOfNumericFeatures": 40, "Quartile3MeansOfNumericAtts": 37.71435392175388, "MaxNominalAttDistinctValues": 53, "MinSkewnessOfNumericAtts": 0.0009682308095295899, "PercentageOfSymbolicFeatures": 50, "Quartile3MutualInformation": null, "MaxSkewnessOfNumericAtts": 4.184649398386025, "MinStdDevOfNumericAtts": 6.937696820773576, "Quartile1AttributeEntropy": null, "Quartile3SkewnessOfNumericAtts": 4.083394015357151, "MaxStdDevOfNumericAtts": 42.75265883082294, "MinorityClassPercentage": null, "Quartile1KurtosisOfNumericAtts": 1.7238866220698774, "Quartile3StdDevOfNumericAtts": 34.99808963058072, "MeanAttributeEntropy": null, "MinorityClassSize": null, "Quartile1MeansOfNumericAtts": 10.122918748208594, "StdvNominalAttDistinctValues": 20.437710243566915, "MeanKurtosisOfNumericAtts": 34.291639227690574, "NumberOfBinaryFeatures": 0, "Quartile1MutualInformation": null, "MeanMeansOfNumericAtts": 20.34675787536328, "Quartile1SkewnessOfNumericAtts": 0.6624634246050428, "AutoCorrelation": -1.997437364632827, "MeanMutualInformation": null, "Quartile1StdDevOfNumericAtts": 7.543284528785542 }, "tags": [ { "uploader": "38960", "tag": "Machine Learning" }, { "uploader": "38960", "tag": "Transportation" } ], "features": [ { "name": "PM.sub.10..sub..particulate.matter..Hourly.measured.", "index": "8", "type": "numeric", "distinct": "21599", "missing": "0", "target": "1", "min": "-4", "max": "651", "mean": "15", "stdev": "12" }, { "name": "datetime", "index": "0", "type": "string", "distinct": "8760", "missing": "0" }, { "name": "Hour", "index": "1", "type": "numeric", "distinct": "24", "missing": "0", "min": "0", "max": "23", "mean": "11", "stdev": "7" }, { "name": "Month", "index": "2", "type": "nominal", "distinct": "12", "missing": "0", "distr": [] }, { "name": "DayofWeek", "index": "3", "type": "nominal", "distinct": "7", "missing": "0", "distr": [] }, { "name": "Site.Name", "index": "4", "type": "nominal", "distinct": "53", "missing": "0", "distr": [] }, { "name": "Environment.Type", "index": "5", "type": "nominal", "distinct": "4", "missing": "0", "distr": [] }, { "name": "Zone", "index": "6", "type": "nominal", "distinct": "30", "missing": "0", "distr": [] }, { "name": "Altitude..m.", "index": "7", "type": "numeric", "distinct": "41", "missing": "0", "min": "4", "max": "260", "mean": "45", "stdev": "43" }, { "name": "PM.sub.2.5..sub..particulate.matter..Hourly.measured.", "index": "9", "type": "numeric", "distinct": "16605", "missing": "0", "min": "-5", "max": "472", "mean": "10", "stdev": "9" } ], "nr_of_issues": 0, "nr_of_downvotes": 0, "nr_of_likes": 0, "nr_of_downloads": 0, "total_downloads": 0, "reach": 0, "reuse": 0, "impact_of_reuse": 0, "reach_of_reuse": 0, "impact": 0 }