{ "data_id": "42742", "name": "porto-seguro", "exact_name": "porto-seguro", "version": 3, "version_label": null, "description": "Training dataset of the 'Porto Seguros Safe Driver Prediction' Kaggle challenge [https:\/\/www.kaggle.com\/c\/porto-seguro-safe-driver-prediction]. The goal was to predict whether a driver will file an insurance claim next year. The official rules of the challenge explicitely state that the data may be used for 'academic research and education, and other non-commercial purposes' [https:\/\/www.kaggle.com\/c\/porto-seguro-safe-driver-prediction\/rules]. For a description of all variables checkout the Kaggle dataset repository [https:\/\/www.kaggle.com\/c\/porto-seguro-safe-driver-prediction\/data]. It states that numeric features with integer values that do not contain 'bin' or 'cat' in their variable names are in fact ordinal features which could be treated as ordinal factors in R. For further information on effective preprocessing and feature engineering checkout the 'Kernels' section of the Kaggle challenge website [https:\/\/www.kaggle.com\/c\/porto-seguro-safe-driver-prediction\/kernels]. Note that many Kagglers removed all 'calc' variables as they do not seem to carry much information.", "format": "arff", "uploader": "Marcos de Paula Bueno", "uploader_id": 11601, "visibility": "public", "creator": null, "contributor": null, "date": "2020-12-03 01:10:51", "update_comment": null, "last_update": "2020-12-03 01:10:51", "licence": "Public", "status": "active", "error_message": null, "url": "https:\/\/www.openml.org\/data\/download\/22044816\/dataset", "kaggle_url": null, "default_target_attribute": "target", "row_id_attribute": null, "ignore_attribute": null, "runs": 0, "suggest": { "input": [ "porto-seguro", "Training dataset of the 'Porto Seguros Safe Driver Prediction' Kaggle challenge [https:\/\/www.kaggle.com\/c\/porto-seguro-safe-driver-prediction]. The goal was to predict whether a driver will file an insurance claim next year. The official rules of the challenge explicitely state that the data may be used for 'academic research and education, and other non-commercial purposes' [https:\/\/www.kaggle.com\/c\/porto-seguro-safe-driver-prediction\/rules]. For a description of all variables checkout the Kagg " ], "weight": 5 }, "qualities": { "NumberOfInstances": 595212, "NumberOfFeatures": 58, "NumberOfClasses": 2, "NumberOfMissingValues": 846458, "NumberOfInstancesWithMissingValues": 470281, "NumberOfNumericFeatures": 26, "NumberOfSymbolicFeatures": 32, "PercentageOfBinaryFeatures": 41.37931034482759, "PercentageOfInstancesWithMissingValues": 79.01067182785293, "PercentageOfMissingValues": 2.451916871345985, "AutoCorrelation": 0.9298568070818584, "PercentageOfNumericFeatures": 44.827586206896555, "Dimensionality": 9.744427195688259e-5, "PercentageOfSymbolicFeatures": 55.172413793103445, "MajorityClassPercentage": 96.3552482140817, "MajorityClassSize": 573518, "MinorityClassPercentage": 3.6447517859182947, "MinorityClassSize": 21694, "NumberOfBinaryFeatures": 24 }, "tags": [ { "uploader": "38960", "tag": "Data Science" }, { "uploader": "38960", "tag": "Research" }, { "uploader": "869", "tag": "study_271" } ], "features": [ { "name": "target", "index": "0", "type": "nominal", "distinct": "2", "missing": "0", "target": "1", "distr": [ [ "0", "1" ], [ [ "573518", "0" ], [ "0", "21694" ] ] ] }, { "name": "ps_ind_01", "index": "1", "type": "numeric", "distinct": "8", "missing": "0", "min": "0", "max": "7", "mean": "2", "stdev": "2" }, { "name": "ps_ind_02_cat", "index": "2", "type": "nominal", "distinct": "4", "missing": "216", "distr": [ [ "1", "2", "3", "4" ], [ [ "416431", "15428" ], [ "118860", "4713" ], [ "27137", "1049" ], [ "10914", "464" ] ] ] }, { "name": "ps_ind_03", "index": "3", "type": "numeric", "distinct": "12", "missing": "0", "min": "0", "max": "11", "mean": "4", "stdev": "3" }, { "name": "ps_ind_04_cat", "index": "4", "type": "nominal", "distinct": "2", "missing": "83", "distr": [ [ "0", "1" ], [ [ "334894", "12071" ], [ "238574", "9590" ] ] ] }, { "name": "ps_ind_05_cat", "index": "5", "type": "nominal", "distinct": "7", "missing": "5809", "distr": [ [ "0", "1", "2", "3", "4", "5", "6" ], [ [ "510128", "17881" ], [ "7924", "398" ], [ "3868", "316" ], [ "7879", "354" ], [ "17389", "955" ], [ "1569", "80" ], [ "19436", "1226" ] ] ] }, { "name": "ps_ind_06_bin", "index": "6", "type": "nominal", "distinct": "2", "missing": "0", "distr": [ [ "0", "1" ], [ [ "345846", "15006" ], [ "227672", "6688" ] ] ] }, { "name": "ps_ind_07_bin", "index": "7", "type": "nominal", "distinct": "2", "missing": "0", "distr": [ [ "0", "1" ], [ [ "427773", "14450" ], [ "145745", "7244" ] ] ] }, { "name": "ps_ind_08_bin", "index": "8", "type": "nominal", "distinct": "2", "missing": "0", "distr": [ [ "0", "1" ], [ [ "480049", "17595" ], [ "93469", "4099" ] ] ] }, { "name": "ps_ind_09_bin", "index": "9", "type": "nominal", "distinct": "2", "missing": "0", "distr": [ [ "0", "1" ], [ [ "466886", "18031" ], [ "106632", "3663" ] ] ] }, { "name": "ps_ind_10_bin", "index": "10", "type": "nominal", "distinct": "2", "missing": "0", "distr": [ [ "0", "1" ], [ [ "573308", "21682" ], [ "210", "12" ] ] ] }, { "name": "ps_ind_11_bin", "index": "11", "type": "nominal", "distinct": "2", "missing": "0", "distr": [ [ "0", "1" ], [ [ "572557", "21648" ], [ "961", "46" ] ] ] }, { "name": "ps_ind_12_bin", "index": "12", "type": "nominal", "distinct": "2", "missing": "0", "distr": [ [ "0", "1" ], [ [ "568189", "21405" ], [ "5329", "289" ] ] ] }, { "name": "ps_ind_13_bin", "index": "13", "type": "nominal", "distinct": "2", "missing": "0", "distr": [ [ "0", "1" ], [ [ "572983", "21665" ], [ "535", "29" ] ] ] }, { "name": "ps_ind_14", "index": "14", "type": "numeric", "distinct": "5", "missing": "0", "min": "0", "max": "4", "mean": "0", "stdev": "0" }, { "name": "ps_ind_15", "index": "15", "type": "numeric", "distinct": "14", "missing": "0", "min": "0", "max": "13", "mean": "7", "stdev": "4" }, { "name": "ps_ind_16_bin", "index": "16", "type": "nominal", "distinct": "2", "missing": "0", "distr": [ [ "0", "1" ], [ [ "193057", "8825" ], [ "380461", "12869" ] ] ] }, { "name": "ps_ind_17_bin", "index": "17", "type": "nominal", "distinct": "2", "missing": "0", "distr": [ [ "0", "1" ], [ [ "505424", "17719" ], [ "68094", "3975" ] ] ] }, { "name": "ps_ind_18_bin", "index": "18", "type": "nominal", "distinct": "2", "missing": "0", "distr": [ [ "0", "1" ], [ [ "485697", "18182" ], [ "87821", "3512" ] ] ] }, { "name": "ps_reg_01", "index": "19", "type": "numeric", "distinct": "10", "missing": "0", "min": "0", "max": "1", "mean": "1", "stdev": "0" }, { "name": "ps_reg_02", "index": "20", "type": "numeric", "distinct": "19", "missing": "0", "min": "0", "max": "2", "mean": "0", "stdev": "0" }, { "name": "ps_reg_03", "index": "21", "type": "numeric", "distinct": "5012", "missing": "107772", "min": "0", "max": "4", "mean": "1", "stdev": "0" }, { "name": "ps_car_01_cat", "index": "22", "type": "nominal", "distinct": "12", "missing": "107", "distr": [ [ "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11" ], [ [ "5645", "259" ], [ "1303", "64" ], [ "2052", "92" ], [ "6408", "250" ], [ "25149", "1025" ], [ "17430", "712" ], [ "60536", "1857" ], [ "174090", "5157" ], [ "14438", "655" ], [ "19217", "1106" ], [ "48235", "1852" ], [ "198942", "8631" ] ] ] }, { "name": "ps_car_02_cat", "index": "23", "type": "nominal", "distinct": "2", "missing": "5", "distr": [ [ "0", "1" ], [ [ "96206", "5011" ], [ "477307", "16683" ] ] ] }, { "name": "ps_car_03_cat", "index": "24", "type": "nominal", "distinct": "2", "missing": "411231", "distr": [ [ "0", "1" ], [ [ "70375", "2897" ], [ "105362", "5347" ] ] ] }, { "name": "ps_car_04_cat", "index": "25", "type": "nominal", "distinct": "10", "missing": "0", "distr": [ [ "0", "1", "2", "3", "4", "5", "6", "7", "8", "9" ], [ [ "479970", "16611" ], [ "30547", "1568" ], [ "22648", "1122" ], [ "607", "33" ], [ "225", "5" ], [ "506", "39" ], [ "1468", "92" ], [ "126", "13" ], [ "19565", "1033" ], [ "17856", "1178" ] ] ] }, { "name": "ps_car_05_cat", "index": "26", "type": "nominal", "distinct": "2", "missing": "266551", "distr": [ [ "0", "1" ], [ [ "149737", "6257" ], [ "165682", "6985" ] ] ] }, { "name": "ps_car_06_cat", "index": "27", "type": "nominal", "distinct": "18", "missing": "0", "distr": [ [ "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17" ], [ [ "106735", "3685" ], [ "114339", "4047" ], [ "1501", "106" ], [ "11539", "458" ], [ "30082", "1054" ], [ "1307", "94" ], [ "20180", "771" ], [ "15531", "627" ], [ "1314", "98" ], [ "16648", "969" ], [ "31990", "1476" ], [ "127355", "4172" ], [ "2270", "116" ], [ "5890", "356" ], [ "57221", "2032" ], [ "20621", "1111" ], [ "4375", "207" ], [ "4620", "315" ] ] ] }, { "name": "ps_car_07_cat", "index": "28", "type": "nominal", "distinct": "2", "missing": "11489", "distr": [ [ "0", "1" ], [ [ "29010", "1565" ], [ "533917", "19231" ] ] ] }, { "name": "ps_car_08_cat", "index": "29", "type": "nominal", "distinct": "2", "missing": "0", "distr": [ [ "0", "1" ], [ [ "95457", "4491" ], [ "478061", "17203" ] ] ] }, { "name": "ps_car_09_cat", "index": "30", "type": "nominal", "distinct": "5", "missing": "569", "distr": [ [ "0", "1", "2", "3", "4" ], [ [ "188032", "6486" ], [ "27424", "1656" ], [ "340644", "12838" ], [ "14238", "518" ], [ "2663", "144" ] ] ] }, { "name": "ps_car_10_cat", "index": "31", "type": "nominal", "distinct": "3", "missing": "0", "distr": [ [ "0", "1", "2" ], [ [ "4690", "167" ], [ "568659", "21520" ], [ "169", "7" ] ] ] }, { "name": "ps_car_11_cat", "index": "32", "type": "nominal", "distinct": "104", "missing": "0", "distr": [ [ "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", "30", "31", "32", "33", "34", "35", "36", "37", "38", "39", "40", "41", "42", "43", "44", "45", "46", "47", "48", "49", "50", "51", "52", "53", "54", "55", "56", "57", "58", "59", "60", "61", "62", "63", "64", "65", "66", "67", "68", "69", "70", "71", "72", "73", "74", "75", "76", "77", "78", "79", "80", "81", "82", "83", "84", "85", "86", "87", "88", "89", "90", "91", "92", "93", "94", "95", "96", "97", "98", "99", "100", "101", "102", "103", "104" ], [ [ "3196", "135" ], [ "2456", "97" ], [ "3022", "163" ], [ "1499", "97" ], [ "11988", "525" ], [ "2388", "92" ], [ "5509", "144" ], [ "2388", "93" ], [ "2149", "62" ], [ "8508", "224" ], [ "6530", "186" ], [ "6966", "280" ], [ "3144", "168" ], [ "2566", "108" ], [ "1814", "66" ], [ "6825", "182" ], [ "3747", "179" ], [ "1151", "82" ], [ "4975", "122" ], [ "1765", "83" ], [ "2282", "142" ], [ "8096", "275" ], [ "1604", "64" ], [ "2261", "92" ], [ "4502", "195" ], [ "3333", "117" ], [ "5720", "210" ], [ "12006", "532" ], [ "4305", "162" ], [ "2251", "71" ], [ "5081", "204" ], [ "12272", "306" ], [ "1693", "97" ], [ "4585", "174" ], [ "1417", "64" ], [ "2842", "115" ], [ "4859", "149" ], [ "8960", "286" ], [ "7483", "193" ], [ "4703", "167" ], [ "2943", "239" ], [ "4802", "142" ], [ "3250", "61" ], [ "5479", "149" ], [ "1630", "76" ], [ "5140", "215" ], [ "1952", "83" ], [ "5831", "179" ], [ "5294", "176" ], [ "1745", "80" ], [ "5784", "224" ], [ "3416", "115" ], [ "4519", "138" ], [ "1553", "69" ], [ "3994", "235" ], [ "1202", "70" ], [ "3705", "90" ], [ "1375", "89" ], [ "1706", "64" ], [ "7742", "250" ], [ "2689", "151" ], [ "2810", "84" ], [ "1036", "67" ], [ "21638", "640" ], [ "10571", "334" ], [ "3017", "82" ], [ "6873", "188" ], [ "8100", "288" ], [ "1693", "92" ], [ "7197", "282" ], [ "2230", "127" ], [ "2713", "164" ], [ "1791", "67" ], [ "4823", "152" ], [ "1572", "106" ], [ "2517", "89" ], [ "2325", "80" ], [ "7050", "284" ], [ "1986", "93" ], [ "5484", "226" ], [ "1374", "59" ], [ "10195", "275" ], [ "9138", "372" ], [ "4747", "146" ], [ "6112", "201" ], [ "3595", "124" ], [ "16565", "541" ], [ "4390", "133" ], [ "4795", "235" ], [ "5050", "239" ], [ "1765", "73" ], [ "6270", "236" ], [ "2759", "172" ], [ "3186", "165" ], [ "3460", "98" ], [ "2006", "67" ], [ "1396", "88" ], [ "2762", "88" ], [ "11765", "277" ], [ "4212", "233" ], [ "7058", "281" ], [ "2032", "71" ], [ "23615", "647" ], [ "81253", "3830" ] ] ] }, { "name": "ps_car_11", "index": "33", "type": "numeric", "distinct": "4", "missing": "5", "min": "0", "max": "3", "mean": "2", "stdev": "1" }, { "name": "ps_car_12", "index": "34", "type": "numeric", "distinct": "183", "missing": "1", "min": "0", "max": "1", "mean": "0", "stdev": "0" }, { "name": "ps_car_13", "index": "35", "type": "numeric", "distinct": "70482", "missing": "0", "min": "0", "max": "4", "mean": "1", "stdev": "0" }, { "name": "ps_car_14", "index": "36", "type": "numeric", "distinct": "849", "missing": "42620", "min": "0", "max": "1", "mean": "0", "stdev": "0" }, { "name": "ps_car_15", "index": "37", "type": "numeric", "distinct": "15", "missing": "0", "min": "0", "max": "4", "mean": "3", "stdev": "1" }, { "name": "ps_calc_01", "index": "38", "type": "numeric", "distinct": "10", "missing": "0", "min": "0", "max": "1", "mean": "0", "stdev": "0" }, { "name": "ps_calc_02", "index": "39", "type": "numeric", "distinct": "10", "missing": "0", "min": "0", "max": "1", "mean": "0", "stdev": "0" }, { "name": "ps_calc_03", "index": "40", "type": "numeric", "distinct": "10", "missing": "0", "min": "0", "max": "1", "mean": "0", "stdev": "0" }, { "name": "ps_calc_04", "index": "41", "type": "numeric", "distinct": "6", "missing": "0", "min": "0", "max": "5", "mean": "2", "stdev": "1" }, { "name": "ps_calc_05", "index": "42", "type": "numeric", "distinct": "7", "missing": "0", "min": "0", "max": "6", "mean": "2", "stdev": "1" }, { "name": "ps_calc_06", "index": "43", "type": "numeric", "distinct": "11", "missing": "0", "min": "0", "max": "10", "mean": "8", "stdev": "1" }, { "name": "ps_calc_07", "index": "44", "type": "numeric", "distinct": "10", "missing": "0", "min": "0", "max": "9", "mean": "3", "stdev": "1" }, { "name": "ps_calc_08", "index": "45", "type": "numeric", "distinct": "11", "missing": "0", "min": "2", "max": "12", "mean": "9", "stdev": "1" }, { "name": "ps_calc_09", "index": "46", "type": "numeric", "distinct": "8", "missing": "0", "min": "0", "max": "7", "mean": "2", "stdev": "1" }, { "name": "ps_calc_10", "index": "47", "type": "numeric", "distinct": "26", "missing": "0", "min": "0", "max": "25", "mean": "8", "stdev": "3" }, { "name": "ps_calc_11", "index": "48", "type": "numeric", "distinct": "20", "missing": "0", "min": "0", "max": "19", "mean": "5", "stdev": "2" }, { "name": "ps_calc_12", "index": "49", "type": "numeric", "distinct": "11", "missing": "0", "min": "0", "max": "10", "mean": "1", "stdev": "1" }, { "name": "ps_calc_13", "index": "50", "type": "numeric", "distinct": "14", "missing": "0", "min": "0", "max": "13", "mean": "3", "stdev": "2" }, { "name": "ps_calc_14", "index": "51", "type": "numeric", "distinct": "24", "missing": "0", "min": "0", "max": "23", "mean": "8", "stdev": "3" }, { "name": "ps_calc_15_bin", "index": "52", "type": "nominal", "distinct": "2", "missing": "0", "distr": [ [ "0", "1" ], [ [ "503286", "19056" ], [ "70232", "2638" ] ] ] }, { "name": "ps_calc_16_bin", "index": "53", "type": "nominal", "distinct": "2", "missing": "0", "distr": [ [ "0", "1" ], [ [ "213474", "8040" ], [ "360044", "13654" ] ] ] }, { "name": "ps_calc_17_bin", "index": "54", "type": "nominal", "distinct": "2", "missing": "0", "distr": [ [ "0", "1" ], [ [ "255675", "9681" ], [ "317843", "12013" ] ] ] }, { "name": "ps_calc_18_bin", "index": "55", "type": "nominal", "distinct": "2", "missing": "0", "distr": [ [ "0", "1" ], [ [ "408842", "15436" ], [ "164676", "6258" ] ] ] }, { "name": "ps_calc_19_bin", "index": "56", "type": "nominal", "distinct": "2", "missing": "0", "distr": [ [ "0", "1" ], [ [ "373254", "14215" ], [ "200264", "7479" ] ] ] }, { "name": "ps_calc_20_bin", "index": "57", "type": "nominal", "distinct": "2", "missing": "0", "distr": [ [ "0", "1" ], [ [ "485544", "18411" ], [ "87974", "3283" ] ] ] } ], "nr_of_issues": 0, "nr_of_downvotes": 0, "nr_of_likes": 0, "nr_of_downloads": 0, "total_downloads": 0, "reach": 0, "reuse": 0, "impact_of_reuse": 0, "reach_of_reuse": 0, "impact": 0 }