{ "data_id": "45551", "name": "ATLAS-Higgs-Boson-Machine-Learning-Challenge-2014", "exact_name": "ATLAS-Higgs-Boson-Machine-Learning-Challenge-2014", "version": 3, "version_label": null, "description": "This is the datasets from the Kaggle Higgs Boson Machine Learning Challenge 2014. The data was downloaded from the [CERN website](http:\/\/opendata.cern.ch\/record\/328), which also hosts the documentation of the data. \nFurther information about the challenge can be found on [Kaggle](https:\/\/www.kaggle.com\/competitions\/higgs-boson\/), [the challenge website](https:\/\/higgsml.ijclab.in2p3.fr), and the [PMLR competition proceedings](http:\/\/proceedings.mlr.press\/v42\/). \n**Notes:** \n* This version encodes -999 as NaN. \n* This version only contains the data used by the Kaggle competition (first 800k samples)", "format": "arff", "uploader": "Matthias Feurer", "uploader_id": 86, "visibility": "public", "creator": null, "contributor": null, "date": "2023-06-04 17:12:47", "update_comment": null, "last_update": "2023-06-04 17:12:47", "licence": "CC0", "status": "active", "error_message": null, "url": "https:\/\/api.openml.org\/data\/download\/22116519\/dataset", "default_target_attribute": "Label", "row_id_attribute": "EventId", "ignore_attribute": "\"Weight\",\"KaggleSet\",\"KaggleWeight\"", "runs": 0, "suggest": { "input": [ "ATLAS-Higgs-Boson-Machine-Learning-Challenge-2014", "This is the datasets from the Kaggle Higgs Boson Machine Learning Challenge 2014. The data was downloaded from the [CERN website](http:\/\/opendata.cern.ch\/record\/328), which also hosts the documentation of the data. Further information about the challenge can be found on [Kaggle](https:\/\/www.kaggle.com\/competitions\/higgs-boson\/), [the challenge website](https:\/\/higgsml.ijclab.in2p3.fr), and the [PMLR competition proceedings](http:\/\/proceedings.mlr.press\/v42\/). * This version encodes -999 as NaN. " ], "weight": 5 }, "qualities": { "NumberOfInstances": 800000, "NumberOfFeatures": 31, "NumberOfClasses": 2, "NumberOfMissingValues": 5053446, "NumberOfInstancesWithMissingValues": 581423, "NumberOfNumericFeatures": 30, "NumberOfSymbolicFeatures": 1, "PercentageOfInstancesWithMissingValues": 72.677875, "AutoCorrelation": 0.5511756889696112, "PercentageOfMissingValues": 20.376798387096773, "Dimensionality": 3.875e-5, "PercentageOfNumericFeatures": 96.7741935483871, "MajorityClassPercentage": 65.828125, "PercentageOfSymbolicFeatures": 3.225806451612903, "MajorityClassSize": 526625, "MinorityClassPercentage": 34.171875, "MinorityClassSize": 273375, "NumberOfBinaryFeatures": 1, "PercentageOfBinaryFeatures": 3.225806451612903 }, "tags": [ { "uploader": "38960", "tag": "Chemistry" }, { "uploader": "38960", "tag": "Life Science" } ], "features": [ { "name": "Label", "index": "32", "type": "nominal", "distinct": "2", "missing": "0", "target": "1", "distr": [ [ "b", "s" ], [ [ "526625", "0" ], [ "0", "273375" ] ] ] }, { "name": "EventId", "index": "0", "type": "numeric", "distinct": "800000", "missing": "0", "identifier": "1", "min": "100000", "max": "899999", "mean": "500000", "stdev": "230940" }, { "name": "DER_mass_MMC", "index": "1", "type": "numeric", "distinct": "176176", "missing": "121936", "min": "9", "max": "1949", "mean": "122", "stdev": "57" }, { "name": "DER_mass_transverse_met_lep", "index": "2", "type": "numeric", "distinct": "131451", "missing": "0", "min": "0", "max": "969", "mean": "49", "stdev": "35" }, { "name": "DER_mass_vis", "index": "3", "type": "numeric", "distinct": "153364", "missing": "0", "min": "6", "max": "1349", "mean": "81", "stdev": "41" }, { "name": "DER_pt_h", "index": "4", "type": "numeric", "distinct": "189499", "missing": "0", "min": "0", "max": "2835", "mean": "58", "stdev": "63" }, { "name": "DER_deltaeta_jet_jet", "index": "5", "type": "numeric", "distinct": "7650", "missing": "567329", "min": "0", "max": "9", "mean": "2", "stdev": "2" }, { "name": "DER_mass_jet_jet", "index": "6", "type": "numeric", "distinct": "194275", "missing": "567329", "min": "14", "max": "4975", "mean": "372", "stdev": "398" }, { "name": "DER_prodeta_jet_jet", "index": "7", "type": "numeric", "distinct": "21795", "missing": "567329", "min": "-19", "max": "17", "mean": "-1", "stdev": "4" }, { "name": "DER_deltar_tau_lep", "index": "8", "type": "numeric", "distinct": "5014", "missing": "0", "min": "0", "max": "6", "mean": "2", "stdev": "1" }, { "name": "DER_pt_tot", "index": "9", "type": "numeric", "distinct": "84176", "missing": "0", "min": "0", "max": "2835", "mean": "19", "stdev": "22" }, { "name": "DER_sum_pt", "index": "10", "type": "numeric", "distinct": "283219", "missing": "0", "min": "46", "max": "2079", "mean": "159", "stdev": "116" }, { "name": "DER_pt_ratio_lep_tau", "index": "11", "type": "numeric", "distinct": "7427", "missing": "0", "min": "0", "max": "32", "mean": "1", "stdev": "1" }, { "name": "DER_met_phi_centrality", "index": "12", "type": "numeric", "distinct": "2830", "missing": "0", "min": "-1", "max": "1", "mean": "0", "stdev": "1" }, { "name": "DER_lep_eta_centrality", "index": "13", "type": "numeric", "distinct": "1001", "missing": "567329", "min": "0", "max": "1", "mean": "0", "stdev": "0" }, { "name": "PRI_tau_pt", "index": "14", "type": "numeric", "distinct": "85907", "missing": "0", "min": "20", "max": "764", "mean": "39", "stdev": "22" }, { "name": "PRI_tau_eta", "index": "15", "type": "numeric", "distinct": "4979", "missing": "0", "min": "-2", "max": "3", "mean": "0", "stdev": "1" }, { "name": "PRI_tau_phi", "index": "16", "type": "numeric", "distinct": "6286", "missing": "0", "min": "-3", "max": "3", "mean": "0", "stdev": "2" }, { "name": "PRI_lep_pt", "index": "17", "type": "numeric", "distinct": "88154", "missing": "0", "min": "26", "max": "701", "mean": "47", "stdev": "22" }, { "name": "PRI_lep_eta", "index": "18", "type": "numeric", "distinct": "5003", "missing": "0", "min": "-3", "max": "3", "mean": "0", "stdev": "1" }, { "name": "PRI_lep_phi", "index": "19", "type": "numeric", "distinct": "6286", "missing": "0", "min": "-3", "max": "3", "mean": "0", "stdev": "2" }, { "name": "PRI_met", "index": "20", "type": "numeric", "distinct": "125278", "missing": "0", "min": "0", "max": "2843", "mean": "42", "stdev": "33" }, { "name": "PRI_met_phi", "index": "21", "type": "numeric", "distinct": "6286", "missing": "0", "min": "-3", "max": "3", "mean": "0", "stdev": "2" }, { "name": "PRI_met_sumet", "index": "22", "type": "numeric", "distinct": "344910", "missing": "0", "min": "14", "max": "2190", "mean": "210", "stdev": "127" }, { "name": "PRI_jet_num", "index": "23", "type": "numeric", "distinct": "4", "missing": "0", "min": "0", "max": "3", "mean": "1", "stdev": "1" }, { "name": "PRI_jet_leading_pt", "index": "24", "type": "numeric", "distinct": "150725", "missing": "320069", "min": "30", "max": "1163", "mean": "85", "stdev": "61" }, { "name": "PRI_jet_leading_eta", "index": "25", "type": "numeric", "distinct": "8897", "missing": "320069", "min": "-5", "max": "5", "mean": "0", "stdev": "2" }, { "name": "PRI_jet_leading_phi", "index": "26", "type": "numeric", "distinct": "6286", "missing": "320069", "min": "-3", "max": "3", "mean": "0", "stdev": "2" }, { "name": "PRI_jet_subleading_pt", "index": "27", "type": "numeric", "distinct": "74366", "missing": "567329", "min": "30", "max": "818", "mean": "58", "stdev": "32" }, { "name": "PRI_jet_subleading_eta", "index": "28", "type": "numeric", "distinct": "8929", "missing": "567329", "min": "-5", "max": "5", "mean": "0", "stdev": "2" }, { "name": "PRI_jet_subleading_phi", "index": "29", "type": "numeric", "distinct": "6286", "missing": "567329", "min": "-3", "max": "3", "mean": "0", "stdev": "2" }, { "name": "PRI_jet_all_pt", "index": "30", "type": "numeric", "distinct": "205282", "missing": "0", "min": "0", "max": "1860", "mean": "73", "stdev": "98" }, { "name": "Weight", "index": "31", "type": "numeric", "distinct": "332024", "missing": "0", "ignore": "1", "min": "0", "max": "2", "mean": "1", "stdev": "1" }, { "name": "KaggleSet", "index": "33", "type": "nominal", "distinct": "3", "missing": "0", "ignore": "1", "distr": [ [ "b", "t", "v" ], [ [ "65975", "34025" ], [ "164333", "85667" ], [ "296317", "153683" ] ] ] }, { "name": "KaggleWeight", "index": "34", "type": "numeric", "distinct": "332900", "missing": "0", "ignore": "1", "min": "0", "max": "19", "mean": "2", "stdev": "2" } ], "nr_of_issues": 0, "nr_of_downvotes": 0, "nr_of_likes": 0, "nr_of_downloads": 0, "total_downloads": 0, "reach": 0, "reuse": 0, "impact_of_reuse": 0, "reach_of_reuse": 0, "impact": 0 }