{ "data_id": "43387", "name": "Oranges-vs.-Grapefruit", "exact_name": "Oranges-vs.-Grapefruit", "version": 1, "version_label": "v1.0", "description": "Oranges vs. Grapefruit\nThe task of separating oranges and grapefruit is fairly obvious to a human, but even with manual observation there is still a bit of error. This dataset takes the color, weight, and diameter of an \"average\" orange and grapefruit and generates a larger dataset containing a wide variety of values and are \"oranges\" and \"grapefruit\".\nContent\nThe dataset is mostly fictional. I'd love to collect real data, but for now measuring starting fruit and creating artificial samples from there seems adequate.\nInspiration\nBinary classification situations are numerous, but tricky for teaching situations. I needed something to create a nice binary classification dataset and still be interesting.", "format": "arff", "uploader": "Onur Yildirim", "uploader_id": 30126, "visibility": "public", "creator": null, "contributor": null, "date": "2022-03-23 12:52:29", "update_comment": null, "last_update": "2022-03-23 12:52:29", "licence": "CC0: Public Domain", "status": "active", "error_message": null, "url": "https:\/\/www.openml.org\/data\/download\/22102212\/dataset", "default_target_attribute": null, "row_id_attribute": null, "ignore_attribute": null, "runs": 0, "suggest": { "input": [ "Oranges-vs.-Grapefruit", "Oranges vs. Grapefruit The task of separating oranges and grapefruit is fairly obvious to a human, but even with manual observation there is still a bit of error. This dataset takes the color, weight, and diameter of an \"average\" orange and grapefruit and generates a larger dataset containing a wide variety of values and are \"oranges\" and \"grapefruit\". Content The dataset is mostly fictional. I'd love to collect real data, but for now measuring starting fruit and creating artificial samples from " ], "weight": 5 }, "qualities": { "NumberOfInstances": 10000, "NumberOfFeatures": 6, "NumberOfClasses": null, "NumberOfMissingValues": 0, "NumberOfInstancesWithMissingValues": 0, "NumberOfNumericFeatures": 5, "NumberOfSymbolicFeatures": 0, "Dimensionality": 0.0006, "PercentageOfNumericFeatures": 83.33333333333334, "MajorityClassPercentage": null, "PercentageOfSymbolicFeatures": 0, "MajorityClassSize": null, "MinorityClassPercentage": null, "MinorityClassSize": null, "NumberOfBinaryFeatures": 0, "PercentageOfBinaryFeatures": 0, "PercentageOfInstancesWithMissingValues": 0, "AutoCorrelation": null, "PercentageOfMissingValues": 0 }, "tags": [ { "uploader": "38960", "tag": "Computer Systems" }, { "uploader": "38960", "tag": "Machine Learning" } ], "features": [ { "name": "name", "index": "0", "type": "string", "distinct": "2", "missing": "0" }, { "name": "diameter", "index": "1", "type": "numeric", "distinct": "940", "missing": "0", "min": "3", "max": "16", "mean": "10", "stdev": "2" }, { "name": "weight", "index": "2", "type": "numeric", "distinct": "6627", "missing": "0", "min": "87", "max": "262", "mean": "175", "stdev": "29" }, { "name": "red", "index": "3", "type": "numeric", "distinct": "75", "missing": "0", "min": "115", "max": "192", "mean": "154", "stdev": "10" }, { "name": "green", "index": "4", "type": "numeric", "distinct": "80", "missing": "0", "min": "31", "max": "116", "mean": "76", "stdev": "12" }, { "name": "blue", "index": "5", "type": "numeric", "distinct": "48", "missing": "0", "min": "2", "max": "56", "mean": "11", "stdev": "9" } ], "nr_of_issues": 0, "nr_of_downvotes": 0, "nr_of_likes": 0, "nr_of_downloads": 0, "total_downloads": 0, "reach": 0, "reuse": 0, "impact_of_reuse": 0, "reach_of_reuse": 0, "impact": 0 }