{ "data_id": "46076", "name": "AIDS_Virus_Infection_Prediction", "exact_name": "AIDS_Virus_Infection_Prediction", "version": 1, "version_label": null, "description": "Description:\nThe AIDS_Classification_50000.csv dataset is a comprehensive resource specifically compiled for researchers and healthcare professionals focusing on the statistical analysis of AIDS (Acquired Immunodeficiency Syndrome). Composed of 50,000 instances, this dataset encapsulates a broad spectrum of clinical and demographic variables related to AIDS patients. Each record in the dataset holds data across 23 columns, indicating various patient attributes including treatment details, demographic information, clinical test results, and disease progression indicators.\n\nAttribute Description:\n- `time`: Time since the baseline measurement, in days.\n- `trt`: Treatment code (0, 1, 2), where each number signifies a different treatment regimen.\n- `age`: Age of the patient in years.\n- `wtkg`: Weight of the patient in kilograms.\n- `hemo`: Presence of Hemophilia (0 = No, 1 = Yes).\n- `homo`: Homosexual behavior (0 = No, 1 = Yes).\n- `drugs`: Drug use (0 = No, 1 = Yes).\n- `karnof`: Karnofsky score indicating patient's functional impairment (scores range from 0 to 100).\n- `oprior`: Number of opportunistic infections prior to study.\n- `z30`: Presence of Z30 gene (0 = No, 1 = Yes).\n- `preanti`: Months before receiving antiretroviral therapy.\n- `race`: Race (0 = Non-white, 1 = White).\n- `gender`: Gender (0 = Female, 1 = Male).\n- `str2`: Stratification variable 2.\n- `strat`: Overall stratification.\n- `symptom`: Presence of specific AIDS-related symptoms (0 = No, 1 = Yes).\n- `treat`: Treatment response (0 = No, 1 = Yes).\n- `offtrt`: Off treatment (0 = No, 1 = Yes).\n- `cd40`: CD4 count at the baseline.\n- `cd420`: CD4 count at 20 weeks.\n- `cd80`: CD4 count at 8 weeks.\n- `cd820`: CD4 count at 20 weeks post the 8-week measurement.\n- `infected`: HIV infection status (0 = Negative, 1 = Positive).\n\nUse Case:\nThis dataset is designed to facilitate a range of scientific inquiries, including the evaluation of treatment efficacy, the identification of prognostic factors for disease progression, and the development of predictive models for patient outcomes. By encompassing a rich variety of data points, the AIDS_Classification_50000.csv supports detailed statistical analyses and machine learning efforts aimed at enhancing our understanding of AIDS. It can particularly serve in the optimization of treatment protocols and in the advancement of targeted therapies, ultimately contributing to improved patient care and management strategies in the field of AIDS research.", "format": "arff", "uploader": "Iwo Godzwon", "uploader_id": 39999, "visibility": "public", "creator": "\"S. Hammer,D. Katzenstein,M.Hughes,H.Gundacker,R. Schooley,R.Haubrich,W. K.,M. Lederman,J. Phair, M. Niu, M. Hirsch, T.Merigan\"", "contributor": "\"None\"", "date": "2024-05-31 12:07:30", "update_comment": null, "last_update": "2024-05-31 12:07:30", "licence": "Public Domain (CC0)", "status": "active", "error_message": null, "url": "https:\/\/api.openml.org\/data\/download\/22120520\/dataset", "default_target_attribute": null, "row_id_attribute": null, "ignore_attribute": null, "runs": 0, "suggest": { "input": [ "AIDS_Virus_Infection_Prediction", "Description: The AIDS_Classification_50000.csv dataset is a comprehensive resource specifically compiled for researchers and healthcare professionals focusing on the statistical analysis of AIDS (Acquired Immunodeficiency Syndrome). Composed of 50,000 instances, this dataset encapsulates a broad spectrum of clinical and demographic variables related to AIDS patients. Each record in the dataset holds data across 23 columns, indicating various patient attributes including treatment details, demogr " ], "weight": 5 }, "qualities": { "NumberOfInstances": 50000, "NumberOfFeatures": 23, "NumberOfClasses": null, "NumberOfMissingValues": 0, "NumberOfInstancesWithMissingValues": 0, "NumberOfNumericFeatures": 23, "NumberOfSymbolicFeatures": 0, "PercentageOfBinaryFeatures": 0, "PercentageOfInstancesWithMissingValues": 0, "AutoCorrelation": null, "PercentageOfMissingValues": 0, "Dimensionality": 0.00046, "PercentageOfNumericFeatures": 100, "MajorityClassPercentage": null, "PercentageOfSymbolicFeatures": 0, "MajorityClassSize": null, "MinorityClassPercentage": null, "MinorityClassSize": null, "NumberOfBinaryFeatures": 0 }, "tags": [], "features": [ { "name": "time", "index": "0", "type": "numeric", "distinct": "1094", "missing": "0", "min": "66", "max": "1231", "mean": "877", "stdev": "307" }, { "name": "trt", "index": "1", "type": "numeric", "distinct": "4", "missing": "0", "min": "0", "max": "3", "mean": "1", "stdev": "1" }, { "name": "age", "index": "2", "type": "numeric", "distinct": "57", "missing": "0", "min": "12", "max": "68", "mean": "34", "stdev": "7" }, { "name": "wtkg", "index": "3", "type": "numeric", "distinct": "49623", "missing": "0", "min": "42", "max": "150", "mean": "76", "stdev": "12" }, { "name": "hemo", "index": "4", "type": "numeric", "distinct": "2", "missing": "0", "min": "0", "max": "1", "mean": "0", "stdev": "0" }, { "name": "homo", "index": "5", "type": "numeric", "distinct": "2", "missing": "0", "min": "0", "max": "1", "mean": "1", "stdev": "0" }, { "name": "drugs", "index": "6", "type": "numeric", "distinct": "2", "missing": "0", "min": "0", "max": "1", "mean": "0", "stdev": "0" }, { "name": "karnof", "index": "7", "type": "numeric", "distinct": "13", "missing": "0", "min": "76", "max": "100", "mean": "97", "stdev": "5" }, { "name": "oprior", "index": "8", "type": "numeric", "distinct": "2", "missing": "0", "min": "0", "max": "1", "mean": "0", "stdev": "0" }, { "name": "z30", "index": "9", "type": "numeric", "distinct": "2", "missing": "0", "min": "0", "max": "1", "mean": "1", "stdev": "0" }, { "name": "preanti", "index": "10", "type": "numeric", "distinct": "1775", "missing": "0", "min": "0", "max": "2828", "mean": "318", "stdev": "403" }, { "name": "race", "index": "11", "type": "numeric", "distinct": "2", "missing": "0", "min": "0", "max": "1", "mean": "0", "stdev": "0" }, { "name": "gender", "index": "12", "type": "numeric", "distinct": "2", "missing": "0", "min": "0", "max": "1", "mean": "1", "stdev": "0" }, { "name": "str2", "index": "13", "type": "numeric", "distinct": "2", "missing": "0", "min": "0", "max": "1", "mean": "1", "stdev": "0" }, { "name": "strat", "index": "14", "type": "numeric", "distinct": "3", "missing": "0", "min": "1", "max": "3", "mean": "2", "stdev": "1" }, { "name": "symptom", "index": "15", "type": "numeric", "distinct": "2", "missing": "0", "min": "0", "max": "1", "mean": "0", "stdev": "0" }, { "name": "treat", "index": "16", "type": "numeric", "distinct": "2", "missing": "0", "min": "0", "max": "1", "mean": "1", "stdev": "0" }, { "name": "offtrt", "index": "17", "type": "numeric", "distinct": "2", "missing": "0", "min": "0", "max": "1", "mean": "0", "stdev": "0" }, { "name": "cd40", "index": "18", "type": "numeric", "distinct": "639", "missing": "0", "min": "0", "max": "930", "mean": "319", "stdev": "103" }, { "name": "cd420", "index": "19", "type": "numeric", "distinct": "902", "missing": "0", "min": "81", "max": "1119", "mean": "438", "stdev": "145" }, { "name": "cd80", "index": "20", "type": "numeric", "distinct": "2710", "missing": "0", "min": "96", "max": "4656", "mean": "1046", "stdev": "489" }, { "name": "cd820", "index": "21", "type": "numeric", "distinct": "2020", "missing": "0", "min": "173", "max": "3538", "mean": "906", "stdev": "340" }, { "name": "infected", "index": "22", "type": "numeric", "distinct": "2", "missing": "0", "min": "0", "max": "1", "mean": "0", "stdev": "0" } ], "nr_of_issues": 0, "nr_of_downvotes": 0, "nr_of_likes": 0, "nr_of_downloads": 0, "total_downloads": 0, "reach": 0, "reuse": 0, "impact_of_reuse": 0, "reach_of_reuse": 0, "impact": 0 }