{ "data_id": "46082", "name": "Thyroid_Disease", "exact_name": "Thyroid_Disease", "version": 1, "version_label": null, "description": "Description:\nThe \"Thyroid_Diff.csv\" dataset is a comprehensive collection of clinical data relating to thyroid diseases. With attributes capturing a wide range of information from patient demographics (age, gender) to specific clinical findings (smoking history, radiotherapy history, thyroid function, physical examination findings), it provides a detailed overview of patients diagnosed with various forms of thyroid conditions. The dataset encapsulates aspects such as the presence of adenopathy, pathology findings, focality of the disease, and risk categorization. Further, it delves into the TNM classification system, providing insights into the size and extent of tumors (T), presence of cancer in nearby lymph nodes (N), and metastasis (M), thereby contributing to the staging of the disease. The clinical response to treatment and recurrence status is also recorded, offering valuable data for outcomes analysis.\n\nAttribute Description:\n- Age: Numeric, represents the age of the patient.\n- Gender: Categorical, 'M' for male, 'F' for female.\n- Smoking: Binary, 'Yes' if the patient has a history of smoking, 'No' otherwise.\n- Hx Smoking: Binary, indicating a historical record of smoking.\n- Hx Radiotherapy: Binary, indicates if the patient has undergone radiotherapy.\n- Thyroid Function: Categorical, reports the thyroid's functional state.\n- Physical Examination: Text, describes findings from physical examination.\n- Adenopathy: Binary, 'Yes' if adenopathy is present, 'No' otherwise.\n- Pathology: Categorical, type of thyroid pathology diagnosed.\n- Focality: Categorical, 'Multi-Focal' or 'Uni-Focal' disease spread.\n- Risk: Categorical, assessed risk level ('Low', 'Intermediate', 'High').\n- T, N, M: Staging parameters as per the TNM classification.\n- Stage: Categorical, stage of the disease.\n- Response: Categorical, patient's response to treatment.\n- Recurred: Binary, 'Yes' if the disease has recurred, 'No' otherwise.\n\nUse Case:\nThis dataset is instrumental for researchers and clinicians focusing on thyroid diseases. Its detailed attributes facilitate analyses on the relationship between demographic factors, lifestyle choices (such as smoking), clinical findings, and treatment outcomes. Furthermore, it can serve as a valuable resource for predictive modeling of disease progression, recurrence, and response to therapy. Machine learning applications can leverage this dataset for developing algorithms that predict patient outcomes, guide treatment plans, and assess risk factors for disease recurrence or poor treatment response.", "format": "arff", "uploader": "Iwo Godzwon", "uploader_id": 39999, "visibility": "public", "creator": "\"None\"", "contributor": "\"Jaina\"", "date": "2024-05-31 13:22:27", "update_comment": null, "last_update": "2024-05-31 13:22:27", "licence": "Attribution (CC BY)", "status": "active", "error_message": null, "url": "https:\/\/api.openml.org\/data\/download\/22120526\/dataset", "default_target_attribute": null, "row_id_attribute": null, "ignore_attribute": null, "runs": 0, "suggest": { "input": [ "Thyroid_Disease", "Description: The \"Thyroid_Diff.csv\" dataset is a comprehensive collection of clinical data relating to thyroid diseases. With attributes capturing a wide range of information from patient demographics (age, gender) to specific clinical findings (smoking history, radiotherapy history, thyroid function, physical examination findings), it provides a detailed overview of patients diagnosed with various forms of thyroid conditions. The dataset encapsulates aspects such as the presence of adenopathy, " ], "weight": 5 }, "qualities": { "NumberOfInstances": 383, "NumberOfFeatures": 17, "NumberOfClasses": null, "NumberOfMissingValues": 0, "NumberOfInstancesWithMissingValues": 0, "NumberOfNumericFeatures": 1, "NumberOfSymbolicFeatures": 14, "NumberOfBinaryFeatures": 6, "PercentageOfBinaryFeatures": 35.294117647058826, "PercentageOfInstancesWithMissingValues": 0, "PercentageOfMissingValues": 0, "AutoCorrelation": null, "PercentageOfNumericFeatures": 5.88235294117647, "Dimensionality": 0.044386422976501305, "PercentageOfSymbolicFeatures": 82.35294117647058, "MajorityClassPercentage": null, "MajorityClassSize": null, "MinorityClassPercentage": null, "MinorityClassSize": null }, "tags": [], "features": [ { "name": "Age", "index": "0", "type": "numeric", "distinct": "65", "missing": "0", "min": "15", "max": "82", "mean": "41", "stdev": "15" }, { "name": "Gender", "index": "1", "type": "nominal", "distinct": "2", "missing": "0", "distr": [] }, { "name": "Smoking", "index": "2", "type": "nominal", "distinct": "2", "missing": "0", "distr": [] }, { "name": "Hx Smoking", "index": "3", "type": "nominal", "distinct": "2", "missing": "0", "distr": [] }, { "name": "Hx Radiothreapy", "index": "4", "type": "nominal", "distinct": "2", "missing": "0", "distr": [] }, { "name": "Thyroid Function", "index": "5", "type": "nominal", "distinct": "5", "missing": "0", "distr": [] }, { "name": "Physical Examination", "index": "6", "type": "nominal", "distinct": "5", "missing": "0", "distr": [] }, { "name": "Adenopathy", "index": "7", "type": "nominal", "distinct": "6", "missing": "0", "distr": [] }, { "name": "Pathology", "index": "8", "type": "nominal", "distinct": "4", "missing": "0", "distr": [] }, { "name": "Focality", "index": "9", "type": "nominal", "distinct": "2", "missing": "0", "distr": [] }, { "name": "Risk", "index": "10", "type": "nominal", "distinct": "3", "missing": "0", "distr": [] }, { "name": "T", "index": "11", "type": "string", "distinct": "7", "missing": "0" }, { "name": "N", "index": "12", "type": "nominal", "distinct": "3", "missing": "0", "distr": [] }, { "name": "M", "index": "13", "type": "string", "distinct": "2", "missing": "0" }, { "name": "Stage", "index": "14", "type": "nominal", "distinct": "5", "missing": "0", "distr": [] }, { "name": "Response", "index": "15", "type": "nominal", "distinct": "4", "missing": "0", "distr": [] }, { "name": "Recurred", "index": "16", "type": "nominal", "distinct": "2", "missing": "0", "distr": [] } ], "nr_of_issues": 0, "nr_of_downvotes": 0, "nr_of_likes": 0, "nr_of_downloads": 0, "total_downloads": 0, "reach": 0, "reuse": 0, "impact_of_reuse": 0, "reach_of_reuse": 0, "impact": 0 }