{ "data_id": "42911", "name": "risk-factors-cervical", "exact_name": "risk-factors-cervical", "version": 1, "version_label": "1", "description": "**Author**: Kelwin Fernandes, Jaime S. Cardoso, Jessica Fernandes\n**Source**: [UCI](https:\/\/archive.ics.uci.edu\/ml\/datasets\/Cervical+cancer+%28Risk+Factors%29) - 2017\n**Please cite**: [Paper](https:\/\/link.springer.com\/chapter\/10.1007\/978-3-319-58838-4_27) \n\n**Cervical cancer (Risk Factors) Data Set**\n\nThe dataset was collected at 'Hospital Universitario de Caracas' in Caracas, Venezuela. The dataset comprises demographic information, habits, and historic medical records of 858 patients. Several patients decided not to answer some of the questions because of privacy concerns (missing values).\n\n### Attribute information\n\n- (int) Age \n- (int) Number of sexual partners \n- (int) First sexual intercourse (age) \n- (int) Num of pregnancies \n- (bool) Smokes \n- (bool) Smokes (years) \n- (bool) Smokes (packs\/year) \n- (bool) Hormonal Contraceptives \n- (int) Hormonal Contraceptives (years) \n- (bool) IUD \n- (int) IUD (years) \n- (bool) STDs \n- (int) STDs (number) \n- (bool) STDs:condylomatosis \n- (bool) STDs:cervical condylomatosis \n- (bool) STDs:vaginal condylomatosis \n- (bool) STDs:vulvo-perineal condylomatosis \n- (bool) STDs:syphilis \n- (bool) STDs:pelvic inflammatory disease \n- (bool) STDs:genital herpes \n- (bool) STDs:molluscum contagiosum \n- (bool) STDs:AIDS \n- (bool) STDs:HIV \n- (bool) STDs:Hepatitis B \n- (bool) STDs:HPV \n- (int) STDs: Number of diagnosis \n- (int) STDs: Time since first diagnosis \n- (int) STDs: Time since last diagnosis \n- (bool) Dx:Cancer \n- (bool) Dx:CIN \n- (bool) Dx:HPV \n- (bool) Dx \n- (bool) Hinselmann: target variable \n- (bool) Schiller: target variable \n- (bool) Cytology: target variable \n- (bool) Biopsy: target variable", "format": "arff", "uploader": "Meilina Reksoprodjo", "uploader_id": 24140, "visibility": "public", "creator": "Kelwin Fernandes, Jaime S. Cardoso, Jessica Fernandes", "contributor": null, "date": "2021-05-19 21:39:50", "update_comment": null, "last_update": "2021-05-19 21:39:50", "licence": "CC-BY", "status": "active", "error_message": null, "url": "https:\/\/www.openml.org\/data\/download\/22045542\/dataset", "default_target_attribute": null, "row_id_attribute": null, "ignore_attribute": null, "runs": 0, "suggest": { "input": [ "risk-factors-cervical", "The dataset was collected at 'Hospital Universitario de Caracas' in Caracas, Venezuela. The dataset comprises demographic information, habits, and historic medical records of 858 patients. Several patients decided not to answer some of the questions because of privacy concerns (missing values). ### Attribute information - (int) Age - (int) Number of sexual partners - (int) First sexual intercourse (age) - (int) Num of pregnancies - (bool) Smokes - (bool) Smokes (years) - (bool) Smokes (packs\/yea " ], "weight": 5 }, "qualities": { "NumberOfInstances": 858, "NumberOfFeatures": 36, "NumberOfClasses": null, "NumberOfMissingValues": 3622, "NumberOfInstancesWithMissingValues": 799, "NumberOfNumericFeatures": 10, "NumberOfSymbolicFeatures": 0, "Dimensionality": 0.04195804195804196, "PercentageOfNumericFeatures": 27.77777777777778, "MajorityClassPercentage": null, "PercentageOfSymbolicFeatures": 0, "MajorityClassSize": null, "MinorityClassPercentage": null, "MinorityClassSize": null, "NumberOfBinaryFeatures": 0, "PercentageOfBinaryFeatures": 0, "PercentageOfInstancesWithMissingValues": 93.12354312354313, "AutoCorrelation": null, "PercentageOfMissingValues": 11.726236726236726 }, "tags": [ { "uploader": "38960", "tag": "Machine Learning" } ], "features": [ { "name": "Age", "index": "0", "type": "numeric", "distinct": "44", "missing": "0", "min": "13", "max": "84", "mean": "27", "stdev": "8" }, { "name": "Number of sexual partners", "index": "1", "type": "string", "distinct": "12", "missing": "26" }, { "name": "First sexual intercourse", "index": "2", "type": "string", "distinct": "21", "missing": "7" }, { "name": "Num of pregnancies", "index": "3", "type": "string", "distinct": "11", "missing": "56" }, { "name": "Smokes", "index": "4", "type": "string", "distinct": "2", "missing": "13" }, { "name": "Smokes (years)", "index": "5", "type": "string", "distinct": "30", "missing": "13" }, { "name": "Smokes (packs\/year)", "index": "6", "type": "string", "distinct": "62", "missing": "13" }, { "name": "Hormonal Contraceptives", "index": "7", "type": "string", "distinct": "2", "missing": "108" }, { "name": "Hormonal Contraceptives (years)", "index": "8", "type": "string", "distinct": "40", "missing": "108" }, { "name": "IUD", "index": "9", "type": "string", "distinct": "2", "missing": "117" }, { "name": "IUD (years)", "index": "10", "type": "string", "distinct": "26", "missing": "117" }, { "name": "STDs", "index": "11", "type": "string", "distinct": "2", "missing": "105" }, { "name": "STDs (number)", "index": "12", "type": "string", "distinct": "5", "missing": "105" }, { "name": "STDs:condylomatosis", "index": "13", "type": "string", "distinct": "2", "missing": "105" }, { "name": "STDs:cervical condylomatosis", "index": "14", "type": "string", "distinct": "1", "missing": "105" }, { "name": "STDs:vaginal condylomatosis", "index": "15", "type": "string", "distinct": "2", "missing": "105" }, { "name": "STDs:vulvo-perineal condylomatosis", "index": "16", "type": "string", "distinct": "2", "missing": "105" }, { "name": "STDs:syphilis", "index": "17", "type": "string", "distinct": "2", "missing": "105" }, { "name": "STDs:pelvic inflammatory disease", "index": "18", "type": "string", "distinct": "2", "missing": "105" }, { "name": "STDs:genital herpes", "index": "19", "type": "string", "distinct": "2", "missing": "105" }, { "name": "STDs:molluscum contagiosum", "index": "20", "type": "string", "distinct": "2", "missing": "105" }, { "name": "STDs:AIDS", "index": "21", "type": "string", "distinct": "1", "missing": "105" }, { "name": "STDs:HIV", "index": "22", "type": "string", "distinct": "2", "missing": "105" }, { "name": "STDs:Hepatitis B", "index": "23", "type": "string", "distinct": "2", "missing": "105" }, { "name": "STDs:HPV", "index": "24", "type": "string", "distinct": "2", "missing": "105" }, { "name": "STDs: Number of diagnosis", "index": "25", "type": "numeric", "distinct": "4", "missing": "0", "min": "0", "max": "3", "mean": "0", "stdev": "0" }, { "name": "STDs: Time since first diagnosis", "index": "26", "type": "string", "distinct": "18", "missing": "787" }, { "name": "STDs: Time since last diagnosis", "index": "27", "type": "string", "distinct": "18", "missing": "787" }, { "name": "Dx:Cancer", "index": "28", "type": "numeric", "distinct": "2", "missing": "0", "min": "0", "max": "1", "mean": "0", "stdev": "0" }, { "name": "Dx:CIN", "index": "29", "type": "numeric", "distinct": "2", "missing": "0", "min": "0", "max": "1", "mean": "0", "stdev": "0" }, { "name": "Dx:HPV", "index": "30", "type": "numeric", "distinct": "2", "missing": "0", "min": "0", "max": "1", "mean": "0", "stdev": "0" }, { "name": "Dx", "index": "31", "type": "numeric", "distinct": "2", "missing": "0", "min": "0", "max": "1", "mean": "0", "stdev": "0" }, { "name": "Hinselmann", "index": "32", "type": "numeric", "distinct": "2", "missing": "0", "min": "0", "max": "1", "mean": "0", "stdev": "0" }, { "name": "Schiller", "index": "33", "type": "numeric", "distinct": "2", "missing": "0", "min": "0", "max": "1", "mean": "0", "stdev": "0" }, { "name": "Citology", "index": "34", "type": "numeric", "distinct": "2", "missing": "0", "min": "0", "max": "1", "mean": "0", "stdev": "0" }, { "name": "Biopsy", "index": "35", "type": "numeric", "distinct": "2", "missing": "0", "min": "0", "max": "1", "mean": "0", "stdev": "0" } ], "nr_of_issues": 0, "nr_of_downvotes": 0, "nr_of_likes": 0, "nr_of_downloads": 0, "total_downloads": 0, "reach": 0, "reuse": 0, "impact_of_reuse": 0, "reach_of_reuse": 0, "impact": 0 }