{ "data_id": "45550", "name": "ATLAS-Higgs-Boson-Machine-Learning-Challenge-2014", "exact_name": "ATLAS-Higgs-Boson-Machine-Learning-Challenge-2014", "version": 2, "version_label": null, "description": "This is the datasets from the Kaggle Higgs Boson Machine Learning Challenge 2014. The data was downloaded from the [CERN website](http:\/\/opendata.cern.ch\/record\/328), which also hosts the documentation of the data. \nFurther information about the challenge can be found on [Kaggle](https:\/\/www.kaggle.com\/competitions\/higgs-boson\/), [the challenge website](https:\/\/higgsml.ijclab.in2p3.fr), and the [PMLR competition proceedings](http:\/\/proceedings.mlr.press\/v42\/). \n**Note:** This version encodes -999 as NaN.", "format": "arff", "uploader": "Matthias Feurer", "uploader_id": 86, "visibility": "public", "creator": null, "contributor": null, "date": "2023-06-04 16:47:14", "update_comment": null, "last_update": "2023-06-04 16:47:14", "licence": "CC0", "status": "active", "error_message": null, "url": "https:\/\/api.openml.org\/data\/download\/22116518\/dataset", "default_target_attribute": "Label", "row_id_attribute": "EventId", "ignore_attribute": "\"Weight\",\"KaggleSet\",\"KaggleWeight\"", "runs": 0, "suggest": { "input": [ "ATLAS-Higgs-Boson-Machine-Learning-Challenge-2014", "This is the datasets from the Kaggle Higgs Boson Machine Learning Challenge 2014. The data was downloaded from the [CERN website](http:\/\/opendata.cern.ch\/record\/328), which also hosts the documentation of the data. Further information about the challenge can be found on [Kaggle](https:\/\/www.kaggle.com\/competitions\/higgs-boson\/), [the challenge website](https:\/\/higgsml.ijclab.in2p3.fr), and the [PMLR competition proceedings](http:\/\/proceedings.mlr.press\/v42\/). " ], "weight": 5 }, "qualities": { "NumberOfInstances": 818238, "NumberOfFeatures": 31, "NumberOfClasses": 2, "NumberOfMissingValues": 5168486, "NumberOfInstancesWithMissingValues": 594664, "NumberOfNumericFeatures": 30, "NumberOfSymbolicFeatures": 1, "PercentageOfBinaryFeatures": 3.225806451612903, "PercentageOfInstancesWithMissingValues": 72.67616512555026, "AutoCorrelation": 0.5512803747569469, "PercentageOfMissingValues": 20.376144207273395, "Dimensionality": 3.788628736382324e-5, "PercentageOfNumericFeatures": 96.7741935483871, "MajorityClassPercentage": 65.83390162764378, "PercentageOfSymbolicFeatures": 3.225806451612903, "MajorityClassSize": 538678, "MinorityClassPercentage": 34.16609837235621, "MinorityClassSize": 279560, "NumberOfBinaryFeatures": 1 }, "tags": [ { "uploader": "38960", "tag": "Chemistry" }, { "uploader": "38960", "tag": "Life Science" } ], "features": [ { "name": "Label", "index": "32", "type": "nominal", "distinct": "2", "missing": "0", "target": "1", "distr": [ [ "b", "s" ], [ [ "538678", "0" ], [ "0", "279560" ] ] ] }, { "name": "EventId", "index": "0", "type": "numeric", "distinct": "818238", "missing": "0", "identifier": "1", "min": "100000", "max": "918237", "mean": "509119", "stdev": "236205" }, { "name": "DER_mass_MMC", "index": "1", "type": "numeric", "distinct": "177621", "missing": "124602", "min": "9", "max": "1949", "mean": "122", "stdev": "57" }, { "name": "DER_mass_transverse_met_lep", "index": "2", "type": "numeric", "distinct": "131888", "missing": "0", "min": "0", "max": "969", "mean": "49", "stdev": "35" }, { "name": "DER_mass_vis", "index": "3", "type": "numeric", "distinct": "154516", "missing": "0", "min": "6", "max": "1349", "mean": "81", "stdev": "41" }, { "name": "DER_pt_h", "index": "4", "type": "numeric", "distinct": "190925", "missing": "0", "min": "0", "max": "2835", "mean": "58", "stdev": "63" }, { "name": "DER_deltaeta_jet_jet", "index": "5", "type": "numeric", "distinct": "7664", "missing": "580253", "min": "0", "max": "9", "mean": "2", "stdev": "2" }, { "name": "DER_mass_jet_jet", "index": "6", "type": "numeric", "distinct": "197974", "missing": "580253", "min": "14", "max": "4975", "mean": "372", "stdev": "398" }, { "name": "DER_prodeta_jet_jet", "index": "7", "type": "numeric", "distinct": "21893", "missing": "580253", "min": "-19", "max": "18", "mean": "-1", "stdev": "4" }, { "name": "DER_deltar_tau_lep", "index": "8", "type": "numeric", "distinct": "5018", "missing": "0", "min": "0", "max": "6", "mean": "2", "stdev": "1" }, { "name": "DER_pt_tot", "index": "9", "type": "numeric", "distinct": "84683", "missing": "0", "min": "0", "max": "2835", "mean": "19", "stdev": "22" }, { "name": "DER_sum_pt", "index": "10", "type": "numeric", "distinct": "285774", "missing": "0", "min": "46", "max": "2079", "mean": "159", "stdev": "116" }, { "name": "DER_pt_ratio_lep_tau", "index": "11", "type": "numeric", "distinct": "7464", "missing": "0", "min": "0", "max": "32", "mean": "1", "stdev": "1" }, { "name": "DER_met_phi_centrality", "index": "12", "type": "numeric", "distinct": "2830", "missing": "0", "min": "-1", "max": "1", "mean": "0", "stdev": "1" }, { "name": "DER_lep_eta_centrality", "index": "13", "type": "numeric", "distinct": "1001", "missing": "580253", "min": "0", "max": "1", "mean": "0", "stdev": "0" }, { "name": "PRI_tau_pt", "index": "14", "type": "numeric", "distinct": "86459", "missing": "0", "min": "20", "max": "764", "mean": "39", "stdev": "22" }, { "name": "PRI_tau_eta", "index": "15", "type": "numeric", "distinct": "4979", "missing": "0", "min": "-2", "max": "3", "mean": "0", "stdev": "1" }, { "name": "PRI_tau_phi", "index": "16", "type": "numeric", "distinct": "6286", "missing": "0", "min": "-3", "max": "3", "mean": "0", "stdev": "2" }, { "name": "PRI_lep_pt", "index": "17", "type": "numeric", "distinct": "88725", "missing": "0", "min": "26", "max": "701", "mean": "47", "stdev": "22" }, { "name": "PRI_lep_eta", "index": "18", "type": "numeric", "distinct": "5003", "missing": "0", "min": "-3", "max": "3", "mean": "0", "stdev": "1" }, { "name": "PRI_lep_phi", "index": "19", "type": "numeric", "distinct": "6286", "missing": "0", "min": "-3", "max": "3", "mean": "0", "stdev": "2" }, { "name": "PRI_met", "index": "20", "type": "numeric", "distinct": "126055", "missing": "0", "min": "0", "max": "2843", "mean": "42", "stdev": "32" }, { "name": "PRI_met_phi", "index": "21", "type": "numeric", "distinct": "6286", "missing": "0", "min": "-3", "max": "3", "mean": "0", "stdev": "2" }, { "name": "PRI_met_sumet", "index": "22", "type": "numeric", "distinct": "348080", "missing": "0", "min": "14", "max": "2190", "mean": "210", "stdev": "127" }, { "name": "PRI_jet_num", "index": "23", "type": "numeric", "distinct": "4", "missing": "0", "min": "0", "max": "3", "mean": "1", "stdev": "1" }, { "name": "PRI_jet_leading_pt", "index": "24", "type": "numeric", "distinct": "152061", "missing": "327371", "min": "30", "max": "1163", "mean": "85", "stdev": "61" }, { "name": "PRI_jet_leading_eta", "index": "25", "type": "numeric", "distinct": "8902", "missing": "327371", "min": "-5", "max": "5", "mean": "0", "stdev": "2" }, { "name": "PRI_jet_leading_phi", "index": "26", "type": "numeric", "distinct": "6286", "missing": "327371", "min": "-3", "max": "3", "mean": "0", "stdev": "2" }, { "name": "PRI_jet_subleading_pt", "index": "27", "type": "numeric", "distinct": "75078", "missing": "580253", "min": "30", "max": "818", "mean": "58", "stdev": "32" }, { "name": "PRI_jet_subleading_eta", "index": "28", "type": "numeric", "distinct": "8932", "missing": "580253", "min": "-5", "max": "5", "mean": "0", "stdev": "2" }, { "name": "PRI_jet_subleading_phi", "index": "29", "type": "numeric", "distinct": "6286", "missing": "580253", "min": "-3", "max": "3", "mean": "0", "stdev": "2" }, { "name": "PRI_jet_all_pt", "index": "30", "type": "numeric", "distinct": "207428", "missing": "0", "min": "0", "max": "1860", "mean": "73", "stdev": "98" }, { "name": "Weight", "index": "31", "type": "numeric", "distinct": "339549", "missing": "0", "ignore": "1", "min": "0", "max": "2", "mean": "1", "stdev": "1" }, { "name": "KaggleSet", "index": "33", "type": "nominal", "distinct": "4", "missing": "0", "ignore": "1", "distr": [ [ "b", "t", "u", "v" ], [ [ "65975", "34025" ], [ "164333", "85667" ], [ "12053", "6185" ], [ "296317", "153683" ] ] ] }, { "name": "KaggleWeight", "index": "34", "type": "numeric", "distinct": "340497", "missing": "0", "ignore": "1", "min": "0", "max": "107", "mean": "2", "stdev": "5" } ], "nr_of_issues": 0, "nr_of_downvotes": 0, "nr_of_likes": 0, "nr_of_downloads": 0, "total_downloads": 0, "reach": 0, "reuse": 0, "impact_of_reuse": 0, "reach_of_reuse": 0, "impact": 0 }