{ "data_id": "43687", "name": "Breast-cancer-prediction", "exact_name": "Breast-cancer-prediction", "version": 1, "version_label": "v1.0", "description": "Context\nThis dataset includes data from a random sample of 20,000 digital and 20,000 film-screen mammograms received by women age 60-89 years within the Breast Cancer Surveillance Consortium (BCSC) between January 2005 and December 2008. Some women contribute multiple examinations to the dataset. Data is useful in teaching about data analysis, epidemiological study designs, or statistical methods for binary outcomes or correlated data.\nContent\nThe data set contains 39998 rows and 13 cols. Attributes are described as follows:\n Field Name **Type (Format) **Description\n\n\n\nAgeAtTheTimeOf_Mammography\nnumber\nPatient's age in years at time of mammogram\n\n\n\n\nRadiologists_Assessment\nstring\nRadiologist's assessment based on the BI-RADS scale\n\n\n---\n---\n---\n\n\nIsBinaryIndicatorOfCancer_Diagnosis\nboolean\nBinary indicator of cancer diagnosis within one year of screening mammogram (false= No cancer diagnosis, true= Cancer diagnosis)\n\n\n---\n---\n---\n\n\nComparisonMammogramFrom_Mammography\nstring\nComparison mammogram from prior mammography examination available\n\n\n---\n---\n---\n\n\nPatientsBIRADSBreastDensity\nstring\nPatient's BI-RADS breast density as recorded at time of mammogram\n\n\n---\n---\n---\n\n\nFamilyHistoryOfBreastCancer\nstring\nFamily history of breast cancer in a first degree relative\n\n\n---\n---\n---\n\n\nCurrentUseOfHormoneTherapy\nstring\nCurrent use of hormone therapy at time of mammogram\n\n\n---\n---\n---\n\n\nBinary_Indicator\nstring\nBinary indicator of whether the woman had ever received a prior mammogram\n\n\n---\n---\n---\n\n\nHistoryOfBreast_Biopsy\nstring\nPrior history of breast biopsy\n\n\n---\n---\n---\n\n\nIsFilmOrDigitalMammogram\nboolean\nFilm or digital mammogram (true=Digital mammogram, false=Film mammogram)\n\n\n---\n---\n---\n\n\nCancer_Type\nstring\nType of cancer\n\n\n---\n---\n---\n\n\n\nAcknowledgements\nWe acknowledge the Breast Cancer Surveillance Consortium (BCSC) for making this data set available for research purposes.", "format": "arff", "uploader": "Dustin Carrion", "uploader_id": 30123, "visibility": "public", "creator": null, "contributor": null, "date": "2022-03-24 07:12:43", "update_comment": null, "last_update": "2022-03-24 07:12:43", "licence": "CC0: Public Domain", "status": "active", "error_message": null, "url": "https:\/\/www.openml.org\/data\/download\/22102512\/dataset", "default_target_attribute": null, "row_id_attribute": null, "ignore_attribute": "\"Patients_Study_ID\"", "runs": 0, "suggest": { "input": [ "Breast-cancer-prediction", "Context This dataset includes data from a random sample of 20,000 digital and 20,000 film-screen mammograms received by women age 60-89 years within the Breast Cancer Surveillance Consortium (BCSC) between January 2005 and December 2008. Some women contribute multiple examinations to the dataset. Data is useful in teaching about data analysis, epidemiological study designs, or statistical methods for binary outcomes or correlated data. Content The data set contains 39998 rows and 13 cols. Attrib " ], "weight": 5 }, "qualities": { "NumberOfInstances": 39998, "NumberOfFeatures": 12, "NumberOfClasses": null, "NumberOfMissingValues": 0, "NumberOfInstancesWithMissingValues": 0, "NumberOfNumericFeatures": 1, "NumberOfSymbolicFeatures": 2, "Dimensionality": 0.0003000150007500375, "PercentageOfNumericFeatures": 8.333333333333332, "MajorityClassPercentage": null, "PercentageOfSymbolicFeatures": 16.666666666666664, "MajorityClassSize": null, "MinorityClassPercentage": null, "MinorityClassSize": null, "NumberOfBinaryFeatures": 2, "PercentageOfBinaryFeatures": 16.666666666666664, "PercentageOfInstancesWithMissingValues": 0, "AutoCorrelation": null, "PercentageOfMissingValues": 0 }, "tags": [ { "uploader": "38960", "tag": "Machine Learning" }, { "uploader": "38960", "tag": "Manufacturing" } ], "features": [ { "name": "Age_At_The_Time_Of_Mammography", "index": "0", "type": "numeric", "distinct": "30", "missing": "0", "min": "60", "max": "89", "mean": "70", "stdev": "7" }, { "name": "Radiologists_Assessment", "index": "1", "type": "string", "distinct": "6", "missing": "0" }, { "name": "Is_Binary_Indicator_Of_Cancer_Diagnosis", "index": "2", "type": "nominal", "distinct": "2", "missing": "0", "distr": [] }, { "name": "Comparison_Mammogram_From_Mammography", "index": "3", "type": "string", "distinct": "3", "missing": "0" }, { "name": "Patients_BI_RADS_Breast_Density", "index": "4", "type": "string", "distinct": "4", "missing": "0" }, { "name": "Family_History_Of_Breast_Cancer", "index": "5", "type": "string", "distinct": "3", "missing": "0" }, { "name": "Current_Use_Of_Hormone_Therapy", "index": "6", "type": "string", "distinct": "3", "missing": "0" }, { "name": "Binary_Indicator", "index": "7", "type": "string", "distinct": "3", "missing": "0" }, { "name": "History_Of_Breast_Biopsy", "index": "8", "type": "string", "distinct": "3", "missing": "0" }, { "name": "Is_Film_Or_Digital_Mammogram", "index": "9", "type": "nominal", "distinct": "2", "missing": "0", "distr": [] }, { "name": "Cancer_Type", "index": "10", "type": "string", "distinct": "3", "missing": "0" }, { "name": "Body_Mass_Index", "index": "11", "type": "string", "distinct": "1897", "missing": "0" }, { "name": "Patients_Study_ID", "index": "12", "type": "numeric", "distinct": "36714", "missing": "0", "ignore": "1", "min": "1", "max": "36714", "mean": "18376", "stdev": "10602" } ], "nr_of_issues": 0, "nr_of_downvotes": 0, "nr_of_likes": 0, "nr_of_downloads": 0, "total_downloads": 0, "reach": 0, "reuse": 0, "impact_of_reuse": 0, "reach_of_reuse": 0, "impact": 0 }