{ "data_id": "44121", "name": "covertype", "exact_name": "covertype", "version": 12, "version_label": null, "description": "Dataset used in the tabular data benchmark https:\/\/github.com\/LeoGrin\/tabular-benchmark, transformed in the same way. This dataset belongs to the \"classification on numerical features\" benchmark. Original description: \n \n**Author**: Jock A. Blackard, Dr. Denis J. Dean, Dr. Charles W. Anderson \n**Source**: [LibSVM repository](http:\/\/www.csie.ntu.edu.tw\/~cjlin\/libsvmtools\/datasets\/) - 2013-11-14 \n**Please cite**: For the binarization: R. Collobert, S. Bengio, and Y. Bengio. A parallel mixture of SVMs for very large scale problems. Neural Computation, 14(05):1105-1114, 2002.\n\nThis is the famous covertype dataset in its binary version, retrieved 2013-11-13 from the libSVM site (called covtype.binary there). Additional to the preprocessing done there (see LibSVM site for details), this dataset was created as follows:\n-load covertpype dataset, unscaled.\n-normalize each file columnwise according to the following rules:\n-If a column only contains one value (constant feature), it will set to zero and thus removed by sparsity.\n-If a column contains two values (binary feature), the value occuring more often will be set to zero, the other to one.\n-If a column contains more than two values (multinary\/real feature), the column is divided by its std deviation.\n-duplicate lines were finally removed.\n\nPreprocessing: Transform from multiclass into binary class.", "format": "arff", "uploader": "Leo Grin", "uploader_id": 26324, "visibility": "public", "creator": "\"Jock A. Blackard\",\"Dr. Denis J. Dean\",\"Dr. Charles W. Anderson\"", "contributor": "\"Leo Grin\"", "date": "2022-07-05 20:24:19", "update_comment": null, "last_update": "2022-07-05 20:24:19", "licence": "Public", "status": "active", "error_message": null, "url": "https:\/\/api.openml.org\/data\/download\/22103246\/dataset", "default_target_attribute": "Y", "row_id_attribute": null, "ignore_attribute": null, "runs": 0, "suggest": { "input": [ "covertype", "Dataset used in the tabular data benchmark https:\/\/github.com\/LeoGrin\/tabular-benchmark, transformed in the same way. This dataset belongs to the \"classification on numerical features\" benchmark. Original description: This is the famous covertype dataset in its binary version, retrieved 2013-11-13 from the libSVM site (called covtype.binary there). Additional to the preprocessing done there (see LibSVM site for details), this dataset was created as follows: -load covertpype dataset, unscaled. -n " ], "weight": 5 }, "qualities": { "NumberOfInstances": 566602, "NumberOfFeatures": 11, "NumberOfClasses": 2, "NumberOfMissingValues": 0, "NumberOfInstancesWithMissingValues": 0, "NumberOfNumericFeatures": 10, "NumberOfSymbolicFeatures": 1, "MinorityClassPercentage": 50, "MinorityClassSize": 283301, "NumberOfBinaryFeatures": 1, "PercentageOfBinaryFeatures": 9.090909090909092, "PercentageOfInstancesWithMissingValues": 0, "PercentageOfMissingValues": 0, "AutoCorrelation": 0.9999982350895956, "Dimensionality": 1.9413980183620954e-5, "PercentageOfNumericFeatures": 90.9090909090909, "MajorityClassPercentage": 50, "PercentageOfSymbolicFeatures": 9.090909090909092, "MajorityClassSize": 283301 }, "tags": [ { "uploader": "38960", "tag": "Computer Systems" }, { "uploader": "38960", "tag": "Mathematics" } ], "features": [ { "name": "Y", "index": "10", "type": "nominal", "distinct": "2", "missing": "0", "target": "1", "distr": [ [ "0", "1" ], [ [ "283301", "0" ], [ "0", "283301" ] ] ] }, { "name": "X1", "index": "0", "type": "numeric", "distinct": "1971", "missing": "0", "min": "7", "max": "14", "mean": "11", "stdev": "1" }, { "name": "X2", "index": "1", "type": "numeric", "distinct": "361", "missing": "0", "min": "0", "max": "3", "mean": "1", "stdev": "1" }, { "name": "X3", "index": "2", "type": "numeric", "distinct": "67", "missing": "0", "min": "0", "max": "9", "mean": "2", "stdev": "1" }, { "name": "X4", "index": "3", "type": "numeric", "distinct": "551", "missing": "0", "min": "0", "max": "7", "mean": "1", "stdev": "1" }, { "name": "X5", "index": "4", "type": "numeric", "distinct": "700", "missing": "0", "min": "-3", "max": "10", "mean": "1", "stdev": "1" }, { "name": "X6", "index": "5", "type": "numeric", "distinct": "5785", "missing": "0", "min": "0", "max": "5", "mean": "2", "stdev": "1" }, { "name": "X7", "index": "6", "type": "numeric", "distinct": "207", "missing": "0", "min": "0", "max": "9", "mean": "8", "stdev": "1" }, { "name": "X8", "index": "7", "type": "numeric", "distinct": "185", "missing": "0", "min": "0", "max": "13", "mean": "11", "stdev": "1" }, { "name": "X9", "index": "8", "type": "numeric", "distinct": "255", "missing": "0", "min": "0", "max": "7", "mean": "4", "stdev": "1" }, { "name": "X10", "index": "9", "type": "numeric", "distinct": "5827", "missing": "0", "min": "0", "max": "5", "mean": "1", "stdev": "1" } ], "nr_of_issues": 0, "nr_of_downvotes": 0, "nr_of_likes": 0, "nr_of_downloads": 0, "total_downloads": 0, "reach": 0, "reuse": 0, "impact_of_reuse": 0, "reach_of_reuse": 0, "impact": 0 }