{ "data_id": "42206", "name": "porto-seguro", "exact_name": "porto-seguro", "version": 2, "version_label": null, "description": "Training dataset of the 'Porto Seguros Safe Driver Prediction' Kaggle challenge [https:\/\/www.kaggle.com\/c\/porto-seguro-safe-driver-prediction]. The goal was to predict whether a driver will file an insurance claim next year. The official rules of the challenge explicitely state that the data may be used for 'academic research and education, and other non-commercial purposes' [https:\/\/www.kaggle.com\/c\/porto-seguro-safe-driver-prediction\/rules]. For a description of all variables checkout the Kaggle dataset repository [https:\/\/www.kaggle.com\/c\/porto-seguro-safe-driver-prediction\/data]. It states that numeric features with integer values that do not contain 'bin' or 'cat' in their variable names are in fact ordinal features which could be treated as ordinal factors in R. For further information on effective preprocessing and feature engineering checkout the 'Kernels' section of the Kaggle challenge website [https:\/\/www.kaggle.com\/c\/porto-seguro-safe-driver-prediction\/kernels]. For this version we removed all 'calc' variables, as the Kaggle forum indicates that they do not carry much information.", "format": "ARFF", "uploader": "Florian Pargent", "uploader_id": 1475, "visibility": "public", "creator": null, "contributor": null, "date": "2019-12-04 17:36:45", "update_comment": null, "last_update": "2019-12-04 17:36:45", "licence": "Public", "status": "active", "error_message": null, "url": "https:\/\/www.openml.org\/data\/download\/21770028\/file1f931f47795.arff", "default_target_attribute": "target", "row_id_attribute": "id", "ignore_attribute": null, "runs": 2, "suggest": { "input": [ "porto-seguro", "Training dataset of the 'Porto Seguros Safe Driver Prediction' Kaggle challenge [https:\/\/www.kaggle.com\/c\/porto-seguro-safe-driver-prediction]. The goal was to predict whether a driver will file an insurance claim next year. The official rules of the challenge explicitely state that the data may be used for 'academic research and education, and other non-commercial purposes' [https:\/\/www.kaggle.com\/c\/porto-seguro-safe-driver-prediction\/rules]. For a description of all variables checkout the Kagg " ], "weight": 5 }, "qualities": { "NumberOfInstances": 595212, "NumberOfFeatures": 38, "NumberOfClasses": 2, "NumberOfMissingValues": 846458, "NumberOfInstancesWithMissingValues": 470281, "NumberOfNumericFeatures": 12, "NumberOfSymbolicFeatures": 26, "EquivalentNumberOfAtts": 492.0279281329562, "MeanSkewnessOfNumericAtts": 1.2578579351609267, "Quartile2MeansOfNumericAtts": 0.8536559975174327, "MajorityClassPercentage": 96.3552482140817, "MeanStdDevOfNumericAtts": 0.9405797078535328, "Quartile2MutualInformation": 0.00044144358213, "MajorityClassSize": 573518, "MinAttributeEntropy": 0.004785679772181064, "Quartile2SkewnessOfNumericAtts": 0.7997781757617288, "MaxAttributeEntropy": 6.0139161982134866, "MinKurtosisOfNumericAtts": -0.8869510338079243, "PercentageOfBinaryFeatures": 47.368421052631575, "Quartile2StdDevOfNumericAtts": 0.374838561156488, "MaxKurtosisOfNumericAtts": 180.69841212538645, "MinMeansOfNumericAtts": 0.012451025852973219, "PercentageOfInstancesWithMissingValues": 79.01067182785293, "Quartile3AttributeEntropy": 0.9935800146655192, "MaxMeansOfNumericAtts": 7.299921708567651, "MinMutualInformation": 7.9797522e-7, "PercentageOfMissingValues": 3.7423994352122922, "Quartile3KurtosisOfNumericAtts": 5.31271417118511, "MaxMutualInformation": 0.00174260455467, "MinNominalAttDistinctValues": 2, "PercentageOfNumericFeatures": 31.57894736842105, "Quartile3MeansOfNumericAtts": 2.885949532852075, "MaxNominalAttDistinctValues": 104, "MinSkewnessOfNumericAtts": -2.216291970374578, "PercentageOfSymbolicFeatures": 68.42105263157895, "Quartile3MutualInformation": 0.000830063931675, "MaxSkewnessOfNumericAtts": 12.207636136041355, "MinStdDevOfNumericAtts": 0.0456099978526511, "Quartile1AttributeEntropy": 0.4504410775602591, "Quartile3SkewnessOfNumericAtts": 1.2335106422515647, "MaxStdDevOfNumericAtts": 3.5460421006584197, "MinorityClassPercentage": 3.6447517859182947, "Quartile1KurtosisOfNumericAtts": -0.5422588289584691, "Quartile3StdDevOfNumericAtts": 1.695965542727014, "MeanAttributeEntropy": 1.0222552334713884, "MinorityClassSize": 21694, "Quartile1MeansOfNumericAtts": 0.3947565537723783, "StdvNominalAttDistinctValues": 20.031859240105113, "MeanKurtosisOfNumericAtts": 16.601201984240465, "NumberOfBinaryFeatures": 18, "Quartile1MutualInformation": 3.2607179605e-5, "MeanMeansOfNumericAtts": 1.8800161695374769, "Quartile1SkewnessOfNumericAtts": -0.5510415988053787, "AutoCorrelation": 0.9298568070818584, "MeanMutualInformation": 0.000458835904472, "Quartile1StdDevOfNumericAtts": 0.15180575155743106, "ClassEntropy": 0.2257600794303692, "MeanNoiseToSignalRatio": 2226.932085323045, "Quartile2AttributeEntropy": 0.6930268228629258, "Dimensionality": 6.384279886830239e-5, "MeanNominalAttDistinctValues": 7.653846153846151, "Quartile2KurtosisOfNumericAtts": 1.0173912400911282 }, "tags": [ { "uploader": "38960", "tag": "Demographics" }, { "uploader": "38960", "tag": "Geography" } ], "features": [ { "name": "target", "index": "1", "type": "nominal", "distinct": "2", "missing": "0", "target": "1", "distr": [ [ "0", "1" ], [ [ "573518", "0" ], [ "0", "21694" ] ] ] }, { "name": "id", "index": "0", "type": "numeric", "distinct": "595212", "missing": "0", "identifier": "1", "min": "7", "max": "1488027", "mean": "743804", "stdev": "429368" }, { "name": "ps_ind_01", "index": "2", "type": "numeric", "distinct": "8", "missing": "0", "min": "0", "max": "7", "mean": "2", "stdev": "2" }, { "name": "ps_ind_02_cat", "index": "3", "type": "nominal", "distinct": "4", "missing": "216", "distr": [ [ "1", "2", "3", "4" ], [ [ "416431", "15428" ], [ "118860", "4713" ], [ "27137", "1049" ], [ "10914", "464" ] ] ] }, { "name": "ps_ind_03", "index": "4", "type": "numeric", "distinct": "12", "missing": "0", "min": "0", "max": "11", "mean": "4", "stdev": "3" }, { "name": "ps_ind_04_cat", "index": "5", "type": "nominal", "distinct": "2", "missing": "83", "distr": [ [ "0", "1" ], [ [ "334894", "12071" ], [ "238574", "9590" ] ] ] }, { "name": "ps_ind_05_cat", "index": "6", "type": "nominal", "distinct": "7", "missing": "5809", "distr": [ [ "0", "1", "2", "3", "4", "5", "6" ], [ [ "510128", "17881" ], [ "7924", "398" ], [ "3868", "316" ], [ "7879", "354" ], [ "17389", "955" ], [ "1569", "80" ], [ "19436", "1226" ] ] ] }, { "name": "ps_ind_06_bin", "index": "7", "type": "nominal", "distinct": "2", "missing": "0", "distr": [ [ "0", "1" ], [ [ "345846", "15006" ], [ "227672", "6688" ] ] ] }, { "name": "ps_ind_07_bin", "index": "8", "type": "nominal", "distinct": "2", "missing": "0", "distr": [ [ "0", "1" ], [ [ "427773", "14450" ], [ "145745", "7244" ] ] ] }, { "name": "ps_ind_08_bin", "index": "9", "type": "nominal", "distinct": "2", "missing": "0", "distr": [ [ "0", "1" ], [ [ "480049", "17595" ], [ "93469", "4099" ] ] ] }, { "name": "ps_ind_09_bin", "index": "10", "type": "nominal", "distinct": "2", "missing": "0", "distr": [ [ "0", "1" ], [ [ "466886", "18031" ], [ "106632", "3663" ] ] ] }, { "name": "ps_ind_10_bin", "index": "11", "type": "nominal", "distinct": "2", "missing": "0", "distr": [ [ "0", "1" ], [ [ "573308", "21682" ], [ "210", "12" ] ] ] }, { "name": "ps_ind_11_bin", "index": "12", "type": "nominal", "distinct": "2", "missing": "0", "distr": [ [ "0", "1" ], [ [ "572557", "21648" ], [ "961", "46" ] ] ] }, { "name": "ps_ind_12_bin", "index": "13", "type": "nominal", "distinct": "2", "missing": "0", "distr": [ [ "0", "1" ], [ [ "568189", "21405" ], [ "5329", "289" ] ] ] }, { "name": "ps_ind_13_bin", "index": "14", "type": "nominal", "distinct": "2", "missing": "0", "distr": [ [ "0", "1" ], [ [ "572983", "21665" ], [ "535", "29" ] ] ] }, { "name": "ps_ind_14", "index": "15", "type": "numeric", "distinct": "5", "missing": "0", "min": "0", "max": "4", "mean": "0", "stdev": "0" }, { "name": "ps_ind_15", "index": "16", "type": "numeric", "distinct": "14", "missing": "0", "min": "0", "max": "13", "mean": "7", "stdev": "4" }, { "name": "ps_ind_16_bin", "index": "17", "type": "nominal", "distinct": "2", "missing": "0", "distr": [ [ "0", "1" ], [ [ "193057", "8825" ], [ "380461", "12869" ] ] ] }, { "name": "ps_ind_17_bin", "index": "18", "type": "nominal", "distinct": "2", "missing": "0", "distr": [ [ "0", "1" ], [ [ "505424", "17719" ], [ "68094", "3975" ] ] ] }, { "name": "ps_ind_18_bin", "index": "19", "type": "nominal", "distinct": "2", "missing": "0", "distr": [ [ "0", "1" ], [ [ "485697", "18182" ], [ "87821", "3512" ] ] ] }, { "name": "ps_reg_01", "index": "20", "type": "numeric", "distinct": "10", "missing": "0", "min": "0", "max": "1", "mean": "1", "stdev": "0" }, { "name": "ps_reg_02", "index": "21", "type": "numeric", "distinct": "19", "missing": "0", "min": "0", "max": "2", "mean": "0", "stdev": "0" }, { "name": "ps_reg_03", "index": "22", "type": "numeric", "distinct": "5012", "missing": "107772", "min": "0", "max": "4", "mean": "1", "stdev": "0" }, { "name": "ps_car_01_cat", "index": "23", "type": "nominal", "distinct": "12", "missing": "107", "distr": [ [ "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11" ], [ [ "5645", "259" ], [ "1303", "64" ], [ "2052", "92" ], [ "6408", "250" ], [ "25149", "1025" ], [ "17430", "712" ], [ "60536", "1857" ], [ "174090", "5157" ], [ "14438", "655" ], [ "19217", "1106" ], [ "48235", "1852" ], [ "198942", "8631" ] ] ] }, { "name": "ps_car_02_cat", "index": "24", "type": "nominal", "distinct": "2", "missing": "5", "distr": [ [ "0", "1" ], [ [ "96206", "5011" ], [ "477307", "16683" ] ] ] }, { "name": "ps_car_03_cat", "index": "25", "type": "nominal", "distinct": "2", "missing": "411231", "distr": [ [ "0", "1" ], [ [ "70375", "2897" ], [ "105362", "5347" ] ] ] }, { "name": "ps_car_04_cat", "index": "26", "type": "nominal", "distinct": "10", "missing": "0", "distr": [ [ "0", "1", "2", "3", "4", "5", "6", "7", "8", "9" ], [ [ "479970", "16611" ], [ "30547", "1568" ], [ "22648", "1122" ], [ "607", "33" ], [ "225", "5" ], [ "506", "39" ], [ "1468", "92" ], [ "126", "13" ], [ "19565", "1033" ], [ "17856", "1178" ] ] ] }, { "name": "ps_car_05_cat", "index": "27", "type": "nominal", "distinct": "2", "missing": "266551", "distr": [ [ "0", "1" ], [ [ "149737", "6257" ], [ "165682", "6985" ] ] ] }, { "name": "ps_car_06_cat", "index": "28", "type": "nominal", "distinct": "18", "missing": "0", "distr": [ [ "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17" ], [ [ "106735", "3685" ], [ "114339", "4047" ], [ "1501", "106" ], [ "11539", "458" ], [ "30082", "1054" ], [ "1307", "94" ], [ "20180", "771" ], [ "15531", "627" ], [ "1314", "98" ], [ "16648", "969" ], [ "31990", "1476" ], [ "127355", "4172" ], [ "2270", "116" ], [ "5890", "356" ], [ "57221", "2032" ], [ "20621", "1111" ], [ "4375", "207" ], [ "4620", "315" ] ] ] }, { "name": "ps_car_07_cat", "index": "29", "type": "nominal", "distinct": "2", "missing": "11489", "distr": [ [ "0", "1" ], [ [ "29010", "1565" ], [ "533917", "19231" ] ] ] }, { "name": "ps_car_08_cat", "index": "30", "type": "nominal", "distinct": "2", "missing": "0", "distr": [ [ "0", "1" ], [ [ "95457", "4491" ], [ "478061", "17203" ] ] ] }, { "name": "ps_car_09_cat", "index": "31", "type": "nominal", "distinct": "5", "missing": "569", "distr": [ [ "0", "1", "2", "3", "4" ], [ [ "188032", "6486" ], [ "27424", "1656" ], [ "340644", "12838" ], [ "14238", "518" ], [ "2663", "144" ] ] ] }, { "name": "ps_car_10_cat", "index": "32", "type": "nominal", "distinct": "3", "missing": "0", "distr": [ [ "0", "1", "2" ], [ [ "4690", "167" ], [ "568659", "21520" ], [ "169", "7" ] ] ] }, { "name": "ps_car_11_cat", "index": "33", "type": "nominal", "distinct": "104", "missing": "0", "distr": [ [ "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", "30", "31", "32", "33", "34", "35", "36", "37", "38", "39", "40", "41", "42", "43", "44", "45", "46", "47", "48", "49", "50", "51", "52", "53", "54", "55", "56", "57", "58", "59", "60", "61", "62", "63", "64", "65", "66", "67", "68", "69", "70", "71", "72", "73", "74", "75", "76", "77", "78", "79", "80", "81", "82", "83", "84", "85", "86", "87", "88", "89", "90", "91", "92", "93", "94", "95", "96", "97", "98", "99", "100", "101", "102", "103", "104" ], [ [ "3196", "135" ], [ "2456", "97" ], [ "3022", "163" ], [ "1499", "97" ], [ "11988", "525" ], [ "2388", "92" ], [ "5509", "144" ], [ "2388", "93" ], [ "2149", "62" ], [ "8508", "224" ], [ "6530", "186" ], [ "6966", "280" ], [ "3144", "168" ], [ "2566", "108" ], [ "1814", "66" ], [ "6825", "182" ], [ "3747", "179" ], [ "1151", "82" ], [ "4975", "122" ], [ "1765", "83" ], [ "2282", "142" ], [ "8096", "275" ], [ "1604", "64" ], [ "2261", "92" ], [ "4502", "195" ], [ "3333", "117" ], [ "5720", "210" ], [ "12006", "532" ], [ "4305", "162" ], [ "2251", "71" ], [ "5081", "204" ], [ "12272", "306" ], [ "1693", "97" ], [ "4585", "174" ], [ "1417", "64" ], [ "2842", "115" ], [ "4859", "149" ], [ "8960", "286" ], [ "7483", "193" ], [ "4703", "167" ], [ "2943", "239" ], [ "4802", "142" ], [ "3250", "61" ], [ "5479", "149" ], [ "1630", "76" ], [ "5140", "215" ], [ "1952", "83" ], [ "5831", "179" ], [ "5294", "176" ], [ "1745", "80" ], [ "5784", "224" ], [ "3416", "115" ], [ "4519", "138" ], [ "1553", "69" ], [ "3994", "235" ], [ "1202", "70" ], [ "3705", "90" ], [ "1375", "89" ], [ "1706", "64" ], [ "7742", "250" ], [ "2689", "151" ], [ "2810", "84" ], [ "1036", "67" ], [ "21638", "640" ], [ "10571", "334" ], [ "3017", "82" ], [ "6873", "188" ], [ "8100", "288" ], [ "1693", "92" ], [ "7197", "282" ], [ "2230", "127" ], [ "2713", "164" ], [ "1791", "67" ], [ "4823", "152" ], [ "1572", "106" ], [ "2517", "89" ], [ "2325", "80" ], [ "7050", "284" ], [ "1986", "93" ], [ "5484", "226" ], [ "1374", "59" ], [ "10195", "275" ], [ "9138", "372" ], [ "4747", "146" ], [ "6112", "201" ], [ "3595", "124" ], [ "16565", "541" ], [ "4390", "133" ], [ "4795", "235" ], [ "5050", "239" ], [ "1765", "73" ], [ "6270", "236" ], [ "2759", "172" ], [ "3186", "165" ], [ "3460", "98" ], [ "2006", "67" ], [ "1396", "88" ], [ "2762", "88" ], [ "11765", "277" ], [ "4212", "233" ], [ "7058", "281" ], [ "2032", "71" ], [ "23615", "647" ], [ "81253", "3830" ] ] ] }, { "name": "ps_car_11", "index": "34", "type": "numeric", "distinct": "4", "missing": "5", "min": "0", "max": "3", "mean": "2", "stdev": "1" }, { "name": "ps_car_12", "index": "35", "type": "numeric", "distinct": "183", "missing": "1", "min": "0", "max": "1", "mean": "0", "stdev": "0" }, { "name": "ps_car_13", "index": "36", "type": "numeric", "distinct": "69456", "missing": "0", "min": "0", "max": "4", "mean": "1", "stdev": "0" }, { "name": "ps_car_14", "index": "37", "type": "numeric", "distinct": "849", "missing": "42620", "min": "0", "max": "1", "mean": "0", "stdev": "0" }, { "name": "ps_car_15", "index": "38", "type": "numeric", "distinct": "15", "missing": "0", "min": "0", "max": "4", "mean": "3", "stdev": "1" } ], "nr_of_issues": 0, "nr_of_downvotes": 0, "nr_of_likes": 0, "nr_of_downloads": 0, "total_downloads": 0, "reach": 0, "reuse": 12, "impact_of_reuse": 0, "reach_of_reuse": 0, "impact": 12 }