{ "data_id": "43955", "name": "poker", "exact_name": "poker", "version": 3, "version_label": null, "description": "Dataset used in the tabular data benchmark https:\/\/github.com\/LeoGrin\/tabular-benchmark, transformed in the same way. This dataset belongs to the \"classification on numerical features\" benchmark. Original description: \n \n**Author**: UCI \n**Source**: [original](http:\/\/www.csie.ntu.edu.tw\/~cjlin\/libsvmtools\/datasets) - \n**Please cite**: \n\nThis is the poker dataset, retrieved 2013-11-14 from the libSVM site. Additional to the preprocessing done there (see LibSVM site for details), this dataset was created as follows: \n\n-join test and train datasets (non-scaled versions) \n-relabel classes 0=positive class and 1,2,...9=negative class \n-normalize each file columnwise according to the following rules: \n-If a column only contains one value (constant feature), it will set to zero and thus removed by sparsity. \n-If a column contains two values (binary feature), the value occuring more often will be set to zero, the other to one. \n-If a column contains more than two values (multinary\/real feature), the column is divided by its std deviation. \n\nNOTE: please keep in mind that poker has a mild redundancy, e.g. some duplicated data points, roughly 0.2%, within each file (train,test). these duplicated points have not been removed!", "format": "arff", "uploader": "Leo Grin", "uploader_id": 26324, "visibility": "public", "creator": "\"UCI\"", "contributor": "\"Leo Grin\"", "date": "2022-06-15 20:37:43", "update_comment": null, "last_update": "2022-06-15 20:37:43", "licence": "Public", "status": "active", "error_message": null, "url": "https:\/\/old.openml.org\/data\/download\/22103041\/dataset", "kaggle_url": null, "default_target_attribute": "Y", "row_id_attribute": null, "ignore_attribute": null, "runs": 0, "suggest": { "input": [ "poker", "Dataset used in the tabular data benchmark https:\/\/github.com\/LeoGrin\/tabular-benchmark, transformed in the same way. This dataset belongs to the \"classification on numerical features\" benchmark. Original description: This is the poker dataset, retrieved 2013-11-14 from the libSVM site. Additional to the preprocessing done there (see LibSVM site for details), this dataset was created as follows: -join test and train datasets (non-scaled versions) -relabel classes 0=positive class and 1,2,...9=ne " ], "weight": 5 }, "qualities": { "NumberOfInstances": 1022616, "NumberOfFeatures": 6, "NumberOfClasses": 0, "NumberOfMissingValues": 0, "NumberOfInstancesWithMissingValues": 0, "NumberOfNumericFeatures": 6, "NumberOfSymbolicFeatures": 0, "PercentageOfBinaryFeatures": 0, "PercentageOfInstancesWithMissingValues": 0, "PercentageOfMissingValues": 0, "AutoCorrelation": 0.9999990221148721, "PercentageOfNumericFeatures": 100, "Dimensionality": 5.867305029453872e-6, "PercentageOfSymbolicFeatures": 0, "MajorityClassPercentage": null, "MajorityClassSize": null, "MinorityClassPercentage": null, "MinorityClassSize": null, "NumberOfBinaryFeatures": 0 }, "tags": [], "features": [ { "name": "Y", "index": "5", "type": "numeric", "distinct": "2", "missing": "0", "target": "1", "min": "0", "max": "1", "mean": "1", "stdev": "1" }, { "name": "X2", "index": "0", "type": "numeric", "distinct": "13", "missing": "0", "min": "0", "max": "3", "mean": "2", "stdev": "1" }, { "name": "X4", "index": "1", "type": "numeric", "distinct": "13", "missing": "0", "min": "0", "max": "3", "mean": "2", "stdev": "1" }, { "name": "X6", "index": "2", "type": "numeric", "distinct": "13", "missing": "0", "min": "0", "max": "3", "mean": "2", "stdev": "1" }, { "name": "X8", "index": "3", "type": "numeric", "distinct": "13", "missing": "0", "min": "0", "max": "3", "mean": "2", "stdev": "1" }, { "name": "X10", "index": "4", "type": "numeric", "distinct": "13", "missing": "0", "min": "0", "max": "3", "mean": "2", "stdev": "1" } ], "nr_of_issues": 0, "nr_of_downvotes": 0, "nr_of_likes": 0, "nr_of_downloads": 0, "total_downloads": 0, "reach": 0, "reuse": 0, "impact_of_reuse": 0, "reach_of_reuse": 0, "impact": 0 }