{ "data_id": "43", "name": "haberman", "exact_name": "haberman", "version": 1, "version_label": "1", "description": "**Author**: \r\n**Source**: Unknown - \r\n**Please cite**: \r\n\r\n1. Title: Haberman's Survival Data\r\n \r\n 2. Sources:\r\n (a) Donor: Tjen-Sien Lim (limt@stat.wisc.edu)\r\n (b) Date: March 4, 1999\r\n \r\n 3. Past Usage:\r\n 1. Haberman, S. J. (1976). Generalized Residuals for Log-Linear\r\n Models, Proceedings of the 9th International Biometrics\r\n Conference, Boston, pp. 104-122.\r\n 2. Landwehr, J. M., Pregibon, D., and Shoemaker, A. C. (1984),\r\n Graphical Models for Assessing Logistic Regression Models (with\r\n discussion), Journal of the American Statistical Association 79:\r\n 61-83.\r\n 3. Lo, W.-D. (1993). Logistic Regression Trees, PhD thesis,\r\n Department of Statistics, University of Wisconsin, Madison, WI.\r\n \r\n 4. Relevant Information:\r\n The dataset contains cases from a study that was conducted between\r\n 1958 and 1970 at the University of Chicago's Billings Hospital on\r\n the survival of patients who had undergone surgery for breast\r\n cancer.\r\n \r\n 5. Number of Instances: 306\r\n \r\n 6. Number of Attributes: 4 (including the class attribute)\r\n \r\n 7. Attribute Information:\r\n 1. Age of patient at time of operation (numerical)\r\n 2. Patient's year of operation (year - 1900, numerical)\r\n 3. Number of positive axillary nodes detected (numerical)\r\n 4. Survival status (class attribute)\r\n 1 = the patient survived 5 years or longer\r\n 2 = the patient died within 5 year\r\n \r\n 8. Missing Attribute Values: None\r\n\r\n Information about the dataset\r\n CLASSTYPE: nominal\r\n CLASSINDEX: last", "format": "ARFF", "uploader": "Jan van Rijn", "uploader_id": 1, "visibility": "public", "creator": "University of Chicago's Billings Hospital", "contributor": null, "date": "2014-04-06 23:22:34", "update_comment": null, "last_update": "2014-04-06 23:22:34", "licence": "Public", "status": "active", "error_message": null, "url": "https:\/\/www.openml.org\/data\/download\/43\/dataset_43_haberman.arff", "kaggle_url": "https:\/\/www.kaggle.com\/datasets\/gowtamsingulur\/habermancsv", "default_target_attribute": "Survival_status", "row_id_attribute": null, "ignore_attribute": null, "runs": 3243, "suggest": { "input": [ "haberman", "1. Title: Haberman's Survival Data 2. Sources: (a) Donor: Tjen-Sien Lim (limt@stat.wisc.edu) (b) Date: March 4, 1999 3. Past Usage: 1. Haberman, S. J. (1976). Generalized Residuals for Log-Linear Models, Proceedings of the 9th International Biometrics Conference, Boston, pp. 104-122. 2. Landwehr, J. M., Pregibon, D., and Shoemaker, A. C. (1984), Graphical Models for Assessing Logistic Regression Models (with discussion), Journal of the American Statistical Association 79: 61-83. 3. Lo, W.-D. (19 " ], "weight": 5 }, "qualities": { "NumberOfInstances": 306, "NumberOfFeatures": 4, "NumberOfClasses": 2, "NumberOfMissingValues": 0, "NumberOfInstancesWithMissingValues": 0, "NumberOfNumericFeatures": 2, "NumberOfSymbolicFeatures": 2, "MaxStdDevOfNumericAtts": 10.803452349303285, "MinorityClassPercentage": 26.47058823529412, "PercentageOfNumericFeatures": 50, "Quartile3MeansOfNumericAtts": 52.45751633986931, "CfsSubsetEval_DecisionStumpAUC": 0.4958024691358025, "RandomTreeDepth2AUC": 0.5319341563786009, "J48.00001.ErrRate": 0.2679738562091503, "MeanAttributeEntropy": 3.526626058053382, "MinorityClassSize": 81, "PercentageOfSymbolicFeatures": 50, "Quartile3MutualInformation": 0.03592504183782, "CfsSubsetEval_DecisionStumpErrRate": 0.2647058823529412, "RandomTreeDepth2ErrRate": 0.3562091503267974, "J48.00001.Kappa": 0.06880427521710084, "MeanKurtosisOfNumericAtts": 5.570741938126849, "NaiveBayesAUC": 0.6978052126200275, "Quartile1AttributeEntropy": 3.526626058053382, "Quartile3SkewnessOfNumericAtts": 2.9838229044308227, "CfsSubsetEval_DecisionStumpKappa": 0, "RandomTreeDepth2Kappa": 0.07396301849075466, "J48.0001.AUC": 0.5385185185185185, "MeanMeansOfNumericAtts": 28.24183006535949, "NaiveBayesErrRate": 0.24836601307189543, "Quartile1KurtosisOfNumericAtts": -0.5893930383959516, "Quartile3StdDevOfNumericAtts": 10.803452349303285, "CfsSubsetEval_NaiveBayesAUC": 0.4958024691358025, "RandomTreeDepth3AUC": 0.5319341563786009, "J48.0001.ErrRate": 0.2679738562091503, "MeanMutualInformation": 0.03592504183782, "NaiveBayesKappa": 0.19701678060907382, "Quartile1MeansOfNumericAtts": 4.026143790849673, "REPTreeDepth1AUC": 0.5160219478737997, "CfsSubsetEval_NaiveBayesErrRate": 0.2647058823529412, "RandomTreeDepth3ErrRate": 0.3562091503267974, "J48.0001.Kappa": 0.06880427521710084, "MeanNoiseToSignalRatio": 97.16623384807683, "NumberOfBinaryFeatures": 1, "Quartile1MutualInformation": 0.03592504183782, "REPTreeDepth1ErrRate": 0.28431372549019607, "CfsSubsetEval_NaiveBayesKappa": 0, "RandomTreeDepth3Kappa": 0.07396301849075466, "J48.001.AUC": 0.5385185185185185, "MeanNominalAttDistinctValues": 7, "Quartile1SkewnessOfNumericAtts": 0.1465050564900938, "REPTreeDepth1Kappa": 0.0172757475083057, "CfsSubsetEval_kNN1NAUC": 0.4958024691358025, "StdvNominalAttDistinctValues": 7.0710678118654755, "J48.001.ErrRate": 0.2679738562091503, "MeanSkewnessOfNumericAtts": 1.5651639804604582, "Quartile1StdDevOfNumericAtts": 7.189653506248565, "REPTreeDepth2AUC": 0.5160219478737997, "CfsSubsetEval_kNN1NErrRate": 0.2647058823529412, "kNN1NAUC": 0.563758573388203, "J48.001.Kappa": 0.06880427521710084, "MeanStdDevOfNumericAtts": 8.996552927775925, "Quartile2AttributeEntropy": 3.526626058053382, "REPTreeDepth2ErrRate": 0.28431372549019607, "CfsSubsetEval_kNN1NKappa": 0, "kNN1NErrRate": 0.3431372549019608, "MajorityClassPercentage": 73.52941176470588, "MinAttributeEntropy": 3.526626058053382, "Quartile2KurtosisOfNumericAtts": 5.570741938126848, "REPTreeDepth2Kappa": 0.0172757475083057, "ClassEntropy": 0.833764907210665, "kNN1NKappa": 0.08602150537634402, "MajorityClassSize": 225, "MinKurtosisOfNumericAtts": -0.5893930383959516, "Quartile2MeansOfNumericAtts": 28.24183006535949, "REPTreeDepth3AUC": 0.5160219478737997, "DecisionStumpAUC": 0.6604938271604939, "MaxAttributeEntropy": 3.526626058053382, "MinMeansOfNumericAtts": 4.026143790849673, "Quartile2MutualInformation": 0.03592504183782, "REPTreeDepth3ErrRate": 0.28431372549019607, "DecisionStumpErrRate": 0.26143790849673204, "MaxKurtosisOfNumericAtts": 11.730876914649649, "MinMutualInformation": 0.03592504183782, "Quartile2SkewnessOfNumericAtts": 1.5651639804604582, "REPTreeDepth3Kappa": 0.0172757475083057, "DecisionStumpKappa": 0.19478981645944363, "MaxMeansOfNumericAtts": 52.45751633986931, "MinNominalAttDistinctValues": 2, "PercentageOfBinaryFeatures": 25, "Quartile2StdDevOfNumericAtts": 8.996552927775925, "RandomTreeDepth1AUC": 0.5319341563786009, "Dimensionality": 0.013071895424836602, "MaxMutualInformation": 0.03592504183782, "MinSkewnessOfNumericAtts": 0.1465050564900938, "PercentageOfInstancesWithMissingValues": 0, "Quartile3AttributeEntropy": 3.526626058053382, "RandomTreeDepth1ErrRate": 0.3562091503267974, "EquivalentNumberOfAtts": 23.208460298379418, "MaxNominalAttDistinctValues": 12, "MinStdDevOfNumericAtts": 7.189653506248565, "PercentageOfMissingValues": 0, "Quartile3KurtosisOfNumericAtts": 11.730876914649649, "AutoCorrelation": 0.7868852459016393, "RandomTreeDepth1Kappa": 0.07396301849075466, "J48.00001.AUC": 0.5385185185185185, "MaxSkewnessOfNumericAtts": 2.9838229044308227 }, "tags": [ { "uploader": "38960", "tag": "Cancer Research" }, { "uploader": "38960", "tag": "Healthcare" }, { "uploader": "2", "tag": "Kaggle" }, { "uploader": "1", "tag": "mythbusting_1" }, { "uploader": "2", "tag": "study_1" }, { "uploader": "3886", "tag": "study_123" }, { "uploader": "4209", "tag": "study_127" }, { "uploader": "939", "tag": "study_15" }, { "uploader": "939", "tag": "study_20" }, { "uploader": "348", "tag": "study_29" }, { "uploader": "348", "tag": "study_30" }, { "uploader": "1", "tag": "study_41" }, { "uploader": "64", "tag": "study_50" }, { "uploader": "64", "tag": "study_52" }, { "uploader": "64", "tag": "study_7" }, { "uploader": "4209", "tag": "study_88" }, { "uploader": "1", "tag": "uci" } ], "features": [ { "name": "Survival_status", "index": "3", "type": "nominal", "distinct": "2", "missing": "0", "target": "1", "distr": [ [ "1", "2" ], [ [ "225", "0" ], [ "0", "81" ] ] ] }, { "name": "Age_of_patient_at_time_of_operation", "index": "0", "type": "numeric", "distinct": "49", "missing": "0", "min": "30", "max": "83", "mean": "52", "stdev": "11" }, { "name": "Patients_year_of_operation", "index": "1", "type": "nominal", "distinct": "12", "missing": "0", "distr": [ [ "58", "59", "60", "61", "62", "63", "64", "65", "66", "67", "68", "69" ], [ [ "24", "12" ], [ "18", "9" ], [ "24", "4" ], [ "23", "3" ], [ "16", "7" ], [ "22", "8" ], [ "23", "8" ], [ "15", "13" ], [ "22", "6" ], [ "21", "4" ], [ "10", "3" ], [ "7", "4" ] ] ] }, { "name": "Number_of_positive_axillary_nodes_detected", "index": "2", "type": "numeric", "distinct": "31", "missing": "0", "min": "0", "max": "52", "mean": "4", "stdev": "7" } ], "nr_of_issues": 0, "nr_of_downvotes": 0, "nr_of_likes": 0, "nr_of_downloads": 0, "total_downloads": 0, "reach": 0, "reuse": 0, "impact_of_reuse": 0, "reach_of_reuse": 0, "impact": 0 }