{ "data_id": "15", "name": "breast-w", "exact_name": "breast-w", "version": 1, "version_label": "1", "description": "**Author**: Dr. William H. Wolberg, University of Wisconsin \r\n**Source**: [UCI](https:\/\/archive.ics.uci.edu\/ml\/datasets\/breast+cancer+wisconsin+(original)), [University of Wisconsin](http:\/\/pages.cs.wisc.edu\/~olvi\/uwmp\/cancer.html) - 1995 \r\n**Please cite**: See below, plus [UCI](https:\/\/archive.ics.uci.edu\/ml\/citation_policy.html) \r\n\r\n**Breast Cancer Wisconsin (Original) Data Set.** Features are computed from a digitized image of a fine needle aspirate (FNA) of a breast mass. They describe characteristics of the cell nuclei present in the image. The target feature records the prognosis (malignant or benign). [Original data available here](ftp:\/\/ftp.cs.wisc.edu\/math-prog\/cpo-dataset\/machine-learn\/cancer\/) \r\n\r\nCurrent dataset was adapted to ARFF format from the UCI version. Sample code ID's were removed. \r\n\r\n! Note that there is also a related Breast Cancer Wisconsin (Diagnosis) Data Set with a different set of features, better known as [wdbc](https:\/\/www.openml.org\/d\/1510).\r\n\r\n### Relevant Papers \r\n\r\nW.N. Street, W.H. Wolberg and O.L. Mangasarian. Nuclear feature extraction for breast tumor diagnosis. IS&T\/SPIE 1993 International Symposium on Electronic Imaging: Science and Technology, volume 1905, pages 861-870, San Jose, CA, 1993. \r\n\r\nO.L. Mangasarian, W.N. Street and W.H. Wolberg. Breast cancer diagnosis and prognosis via linear programming. Operations Research, 43(4), pages 570-577, July-August 1995. \r\n\r\n### Citation request \r\n\r\nThis breast cancer database was obtained from the University of Wisconsin Hospitals, Madison from Dr. William H. Wolberg. If you publish results when using this database, then please include this information in your acknowledgments. Also, please cite one or more of:\r\n\r\n 1. O. L. Mangasarian and W. H. Wolberg: \"Cancer diagnosis via linear \r\n programming\", SIAM News, Volume 23, Number 5, September 1990, pp 1 & 18.\r\n\r\n 2. William H. Wolberg and O.L. Mangasarian: \"Multisurface method of \r\n pattern separation for medical diagnosis applied to breast cytology\", \r\n Proceedings of the National Academy of Sciences, U.S.A., Volume 87, \r\n December 1990, pp 9193-9196.\r\n\r\n 3. O. L. Mangasarian, R. Setiono, and W.H. Wolberg: \"Pattern recognition \r\n via linear programming: Theory and application to medical diagnosis\", \r\n in: \"Large-scale numerical optimization\", Thomas F. Coleman and Yuying\r\n Li, editors, SIAM Publications, Philadelphia 1990, pp 22-30.\r\n\r\n 4. K. P. Bennett & O. L. Mangasarian: \"Robust linear programming \r\n discrimination of two linearly inseparable sets\", Optimization Methods\r\n and Software 1, 1992, 23-34 (Gordon & Breach Science Publishers).", "format": "ARFF", "uploader": "Jan van Rijn", "uploader_id": 1, "visibility": "public", "creator": "Dr. William H. Wolberg", "contributor": "Olvi Mangasarian", "date": "2014-04-06 23:20:20", "update_comment": "added special attributes", "last_update": "2014-09-21 23:04:47", "licence": "Public", "status": "active", "error_message": null, "url": "https:\/\/www.openml.org\/data\/download\/52350\/openml_phpJNxH0q", "default_target_attribute": "Class", "row_id_attribute": null, "ignore_attribute": null, "runs": 28732, "suggest": { "input": [ "breast-w", "Current dataset was adapted to ARFF format from the UCI version. Sample code ID's were removed. ! Note that there is also a related Breast Cancer Wisconsin (Diagnosis) Data Set with a different set of features, better known as [wdbc](https:\/\/www.openml.org\/d\/1510). ### Relevant Papers W.N. Street, W.H. Wolberg and O.L. Mangasarian. Nuclear feature extraction for breast tumor diagnosis. IS&T\/SPIE 1993 International Symposium on Electronic Imaging: Science and Technology, volume 1905, pages 861-87 " ], "weight": 5 }, "qualities": { "NumberOfInstances": 699, "NumberOfFeatures": 10, "NumberOfClasses": 2, "NumberOfMissingValues": 16, "NumberOfInstancesWithMissingValues": 16, "NumberOfNumericFeatures": 9, "NumberOfSymbolicFeatures": 1, "Quartile2AttributeEntropy": null, "REPTreeDepth2ErrRate": 0.055793991416309016, "CfsSubsetEval_kNN1NKappa": 0.876873811577773, "kNN1NErrRate": 0.04291845493562232, "MajorityClassPercentage": 65.52217453505007, "MeanStdDevOfNumericAtts": 2.7510805456186183, "Quartile2KurtosisOfNumericAtts": 0.18462131150511896, "REPTreeDepth2Kappa": 0.8756551129599474, "ClassEntropy": 0.9293179372497983, "kNN1NKappa": 0.9044446672195541, "MajorityClassSize": 458, "MinAttributeEntropy": null, "Quartile2MeansOfNumericAtts": 3.2074391988555093, "REPTreeDepth3AUC": 0.952780445378608, "DecisionStumpAUC": 0.9249080432694923, "MaxAttributeEntropy": null, "MinKurtosisOfNumericAtts": -0.7988441354436415, "Quartile2MutualInformation": null, "REPTreeDepth3ErrRate": 0.055793991416309016, "DecisionStumpErrRate": 0.08869814020028613, "MaxKurtosisOfNumericAtts": 12.6578780670256, "MinMeansOfNumericAtts": 1.589413447782546, "Quartile2SkewnessOfNumericAtts": 1.233136557961704, "REPTreeDepth3Kappa": 0.8756551129599474, "DecisionStumpKappa": 0.8107527445175151, "MaxMeansOfNumericAtts": 4.417739628040058, "MinMutualInformation": null, "PercentageOfBinaryFeatures": 10, "Quartile2StdDevOfNumericAtts": 2.85537923921701, "RandomTreeDepth1AUC": 0.9516904855835366, "Dimensionality": 0.01430615164520744, "MaxMutualInformation": null, "MinNominalAttDistinctValues": 2, "PercentageOfInstancesWithMissingValues": 2.28898426323319, "Quartile3AttributeEntropy": null, "RandomTreeDepth1ErrRate": 0.044349070100143065, "EquivalentNumberOfAtts": null, "MaxNominalAttDistinctValues": 2, "MinSkewnessOfNumericAtts": 0.5928585326862892, "PercentageOfMissingValues": 0.22889842632331905, "Quartile3KurtosisOfNumericAtts": 1.57850674634774, "AutoCorrelation": 0.6318051575931232, "RandomTreeDepth1Kappa": 0.9017452695441621, "J48.00001.AUC": 0.9269600826251608, "MaxSkewnessOfNumericAtts": 3.560657844495268, "MinStdDevOfNumericAtts": 1.7150779425067932, "PercentageOfNumericFeatures": 90, "Quartile3MeansOfNumericAtts": 3.49121208503258, "CfsSubsetEval_DecisionStumpAUC": 0.9269600826251608, "RandomTreeDepth2AUC": 0.9516904855835366, "J48.00001.ErrRate": 0.055793991416309016, "MaxStdDevOfNumericAtts": 3.643857160492903, "MinorityClassPercentage": 34.47782546494993, "PercentageOfSymbolicFeatures": 10, "Quartile3MutualInformation": null, "CfsSubsetEval_DecisionStumpErrRate": 0.055793991416309016, "RandomTreeDepth2ErrRate": 0.044349070100143065, "J48.00001.Kappa": 0.876873811577773, "MeanAttributeEntropy": null, "MinorityClassSize": 241, "Quartile1AttributeEntropy": null, "Quartile3SkewnessOfNumericAtts": 1.618319946298536, "CfsSubsetEval_DecisionStumpKappa": 0.876873811577773, "RandomTreeDepth2Kappa": 0.9017452695441621, "J48.0001.AUC": 0.9269600826251608, "MeanKurtosisOfNumericAtts": 1.6841150960407236, "NaiveBayesAUC": 0.9883078465880973, "Quartile1KurtosisOfNumericAtts": -0.3083522161398966, "Quartile3StdDevOfNumericAtts": 3.0525465017834734, "CfsSubsetEval_NaiveBayesAUC": 0.9269600826251608, "RandomTreeDepth3AUC": 0.9516904855835366, "J48.0001.ErrRate": 0.055793991416309016, "MeanMeansOfNumericAtts": 3.1357041002822355, "NaiveBayesErrRate": 0.04005722460658083, "Quartile1MeansOfNumericAtts": 2.836909871244634, "REPTreeDepth1AUC": 0.952780445378608, "CfsSubsetEval_NaiveBayesErrRate": 0.055793991416309016, "RandomTreeDepth3ErrRate": 0.044349070100143065, "J48.0001.Kappa": 0.876873811577773, "MeanMutualInformation": null, "NaiveBayesKappa": 0.9125445722405428, "Quartile1MutualInformation": null, "REPTreeDepth1ErrRate": 0.055793991416309016, "CfsSubsetEval_NaiveBayesKappa": 0.876873811577773, "RandomTreeDepth3Kappa": 0.9017452695441621, "J48.001.AUC": 0.9269600826251608, "MeanNoiseToSignalRatio": null, "NumberOfBinaryFeatures": 1, "Quartile1SkewnessOfNumericAtts": 1.0449923682055797, "REPTreeDepth1Kappa": 0.8756551129599474, "CfsSubsetEval_kNN1NAUC": 0.9269600826251608, "StdvNominalAttDistinctValues": 0, "J48.001.ErrRate": 0.055793991416309016, "MeanNominalAttDistinctValues": 2, "Quartile1StdDevOfNumericAtts": 2.3263320694866376, "REPTreeDepth2AUC": 0.952780445378608, "CfsSubsetEval_kNN1NErrRate": 0.055793991416309016, "kNN1NAUC": 0.9752215115330954, "J48.001.Kappa": 0.876873811577773, "MeanSkewnessOfNumericAtts": 1.477488666731089 }, "tags": [ { "uploader": "2", "tag": "cancer" }, { "uploader": "38960", "tag": "Machine Learning" }, { "uploader": "2", "tag": "medical" }, { "uploader": "38960", "tag": "Medicine" }, { "uploader": "1", "tag": "mythbusting_1" }, { "uploader": "1", "tag": "OpenML-CC18" }, { "uploader": "348", "tag": "OpenML100" }, { "uploader": "2", "tag": "study_1" }, { "uploader": "3886", "tag": "study_123" }, { "uploader": "5824", "tag": "study_135" }, { "uploader": "64", "tag": "study_14" }, { "uploader": "939", "tag": "study_15" }, { "uploader": "939", "tag": "study_20" }, { "uploader": "1", "tag": "study_34" }, { "uploader": "1", "tag": "study_37" }, { "uploader": "1", "tag": "study_41" }, { "uploader": "64", "tag": "study_50" }, { "uploader": "64", "tag": "study_52" }, { "uploader": "1856", "tag": "study_70" }, { "uploader": "1935", "tag": "study_98" }, { "uploader": "1", "tag": "study_99" }, { "uploader": "1", "tag": "uci" } ], "features": [ { "name": "Class", "index": "9", "type": "nominal", "distinct": "2", "missing": "0", "target": "1", "distr": [ [ "benign", "malignant" ], [ [ "458", "0" ], [ "0", "241" ] ] ] }, { "name": "Clump_Thickness", "index": "0", "type": "numeric", "distinct": "10", "missing": "0", "min": "1", "max": "10", "mean": "4", "stdev": "3" }, { "name": "Cell_Size_Uniformity", "index": "1", "type": "numeric", "distinct": "10", "missing": "0", "min": "1", "max": "10", "mean": "3", "stdev": "3" }, { "name": "Cell_Shape_Uniformity", "index": "2", "type": "numeric", "distinct": "10", "missing": "0", "min": "1", "max": "10", "mean": "3", "stdev": "3" }, { "name": "Marginal_Adhesion", "index": "3", "type": "numeric", "distinct": "10", "missing": "0", "min": "1", "max": "10", "mean": "3", "stdev": "3" }, { "name": "Single_Epi_Cell_Size", "index": "4", "type": "numeric", "distinct": "10", "missing": "0", "min": "1", "max": "10", "mean": "3", "stdev": "2" }, { "name": "Bare_Nuclei", "index": "5", "type": "numeric", "distinct": "10", "missing": "16", "min": "1", "max": "10", "mean": "4", "stdev": "4" }, { "name": "Bland_Chromatin", "index": "6", "type": "numeric", "distinct": "10", "missing": "0", "min": "1", "max": "10", "mean": "3", "stdev": "2" }, { "name": "Normal_Nucleoli", "index": "7", "type": "numeric", "distinct": "10", "missing": "0", "min": "1", "max": "10", "mean": "3", "stdev": "3" }, { "name": "Mitoses", "index": "8", "type": "numeric", "distinct": "9", "missing": "0", "min": "1", "max": "10", "mean": "2", "stdev": "2" } ], "nr_of_issues": 0, "nr_of_downvotes": 0, "nr_of_likes": 0, "nr_of_downloads": 0, "total_downloads": 0, "reach": 0, "reuse": 0, "impact_of_reuse": 0, "reach_of_reuse": 0, "impact": 0 }