{ "data_id": "203", "name": "lowbwt", "exact_name": "lowbwt", "version": 1, "version_label": "1", "description": "**Author**: \n**Source**: Unknown - \n**Please cite**: \n\n!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n\n Identification code deleted. \n\n As used by Kilpatrick, D. & Cameron-Jones, M. (1998). Numeric prediction\n using instance-based learning with encoding length selection. In Progress\n in Connectionist-Based Information Systems. Singapore: Springer-Verlag.\n\n !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n\n NAME: LOW BIRTH WEIGHT DATA\n KEYWORDS: Logistic Regression\n SIZE: 189 observations, 11 variables\n \n NOTE:\n These data come from Appendix 1 of Hosmer and Lemeshow (1989).\n These data are copyrighted and must be acknowledged and used accordingly.\n \n DESCRIPTIVE ABSTRACT:\n The goal of this study was to identify risk factors associated with\n giving birth to a low birth weight baby (weighing less than 2500 grams).\n Data were collected on 189 women, 59 of which had low birth weight babies\n and 130 of which had normal birth weight babies. Four variables which were\n thought to be of importance were age, weight of the subject at her last\n menstrual period, race, and the number of physician visits during the first\n trimester of pregnancy.\n \n \n SOURCE:\n Data were collected at Baystate Medical Center, Springfield,\n Massachusetts, during 1986.\n \n \n NOTE:\n This data set consists of the complete data. A paired data set\n created from this low birth weight data may be found in plowbwt.dat and\n a 3 to 1 matched data set created from the low birth weight data may be\n found in mlowbwt.dat.\n \n \n \n Table: Code Sheet for the Variables in the Low Birth Weight Data Set.\n \n Columns Variable Abbreviation\n -----------------------------------------------------------------------------\n 2-4 Identification Code ID\n \n 10 Low Birth Weight (0 = Birth Weight ge 2500g, LOW\n l = Birth Weight < 2500g)\n \n 17-18 Age of the Mother in Years AGE\n \n 23-25 Weight in Pounds at the Last Menstrual Period LWT\n \n 32 Race (1 = White, 2 = Black, 3 = Other) RACE\n \n 40 Smoking Status During Pregnancy (1 = Yes, 0 = No) SMOKE\n \n 48 History of Premature Labor (0 = None, 1 = One, etc.) PTL\n \n 55 History of Hypertension (1 = Yes, 0 = No) HT\n \n 61 Presence of Uterine Irritability (1 = Yes, 0 = No) UI\n \n 67 Number of Physician Visits During the First Trimester FTV\n (0 = None, 1 = One, 2 = Two, etc.)\n \n 73-76 Birth Weight in Grams BWT\n -----------------------------------------------------------------------------\n \n PEDAGOGICAL NOTES:\n These data have been used as an example of fitting a multiple\n logistic regression model.\n \n STORY BEHIND THE DATA:\n Low birth weight is an outcome that has been of concern to physicians\n for years. This is due to the fact that infant mortality rates and birth\n defect rates are very high for low birth weight babies. A woman's behavior\n during pregnancy (including diet, smoking habits, and receiving prenatal care)\n can greatly alter the chances of carrying the baby to term and, consequently,\n of delivering a baby of normal birth weight.\n The variables identified in the code sheet given in the table have been\n shown to be associated with low birth weight in the obstetrical literature. The\n goal of the current study was to ascertain if these variables were important\n in the population being served by the medical center where the data were\n collected.\n \n \n References:\n \n 1. Hosmer and Lemeshow, Applied Logistic Regression, Wiley, (1989).", "format": "ARFF", "uploader": "Jan van Rijn", "uploader_id": 1, "visibility": "public", "creator": "Baystate Medical Center,Springfield,Massachusetts", "contributor": "David W. Aha", "date": "2014-04-23 13:17:07", "update_comment": null, "last_update": "2014-04-23 13:17:07", "licence": "Public", "status": "active", "error_message": null, "url": "https:\/\/www.openml.org\/data\/download\/3640\/dataset_2189_lowbwt.arff", "kaggle_url": null, "default_target_attribute": "class", "row_id_attribute": null, "ignore_attribute": null, "runs": 4, "suggest": { "input": [ "lowbwt", "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! Identification code deleted. As used by Kilpatrick, D. & Cameron-Jones, M. (1998). Numeric prediction using instance-based learning with encoding length selection. In Progress in Connectionist-Based Information Systems. Singapore: Springer-Verlag. !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! NAME: LOW BIRTH WEIGHT DATA KEYWORDS: Logistic Regression SIZE: 189 observations, 11 variables NOTE: T " ], "weight": 5 }, "qualities": { "NumberOfInstances": 189, "NumberOfFeatures": 10, "NumberOfClasses": 0, "NumberOfMissingValues": 0, "NumberOfInstancesWithMissingValues": 0, "NumberOfNumericFeatures": 3, "NumberOfSymbolicFeatures": 7, "MaxStdDevOfNumericAtts": 729.0224168601321, "MinorityClassPercentage": null, "PercentageOfNumericFeatures": 30, "Quartile3MeansOfNumericAtts": 2944.656084656085, "CfsSubsetEval_DecisionStumpAUC": null, "RandomTreeDepth2AUC": null, "J48.00001.ErrRate": null, "MeanAttributeEntropy": null, "MinorityClassSize": null, "PercentageOfSymbolicFeatures": 70, "Quartile3MutualInformation": null, "CfsSubsetEval_DecisionStumpErrRate": null, "RandomTreeDepth2ErrRate": null, "J48.00001.Kappa": null, "MeanKurtosisOfNumericAtts": 0.9795573041394643, "NaiveBayesAUC": null, "Quartile1AttributeEntropy": null, "Quartile3SkewnessOfNumericAtts": 1.4020067975650343, "CfsSubsetEval_DecisionStumpKappa": null, "RandomTreeDepth2Kappa": null, "J48.0001.AUC": null, "MeanMeansOfNumericAtts": 1032.5696649029983, "NaiveBayesErrRate": null, "Quartile1KurtosisOfNumericAtts": -0.08141574266836882, "Quartile3StdDevOfNumericAtts": 729.0224168601321, "CfsSubsetEval_NaiveBayesAUC": null, "RandomTreeDepth3AUC": null, "J48.0001.ErrRate": null, "MeanMutualInformation": null, "NaiveBayesKappa": null, "Quartile1MeansOfNumericAtts": 23.238095238095237, "REPTreeDepth1AUC": null, "CfsSubsetEval_NaiveBayesErrRate": null, "RandomTreeDepth3ErrRate": null, "J48.0001.Kappa": null, "MeanNoiseToSignalRatio": null, "NumberOfBinaryFeatures": 4, "Quartile1MutualInformation": null, "REPTreeDepth1ErrRate": null, "CfsSubsetEval_NaiveBayesKappa": null, "RandomTreeDepth3Kappa": null, "J48.001.AUC": null, "MeanNominalAttDistinctValues": 3, "Quartile1SkewnessOfNumericAtts": -0.21017096945202257, "REPTreeDepth1Kappa": null, "CfsSubsetEval_kNN1NAUC": null, "StdvNominalAttDistinctValues": 1.5275252316519468, "J48.001.ErrRate": null, "MeanSkewnessOfNumericAtts": 0.6380064212513571, "Quartile1StdDevOfNumericAtts": 5.298677933404265, "REPTreeDepth2AUC": null, "CfsSubsetEval_kNN1NErrRate": null, "kNN1NAUC": null, "J48.001.Kappa": null, "MeanStdDevOfNumericAtts": 254.9668250487617, "Quartile2AttributeEntropy": null, "REPTreeDepth2ErrRate": null, "CfsSubsetEval_kNN1NKappa": null, "kNN1NErrRate": null, "MajorityClassPercentage": null, "MinAttributeEntropy": null, "Quartile2KurtosisOfNumericAtts": 0.6162022516194381, "REPTreeDepth2Kappa": null, "ClassEntropy": null, "kNN1NKappa": null, "MajorityClassSize": null, "MinKurtosisOfNumericAtts": -0.08141574266836882, "Quartile2MeansOfNumericAtts": 129.81481481481484, "REPTreeDepth3AUC": null, "DecisionStumpAUC": null, "MaxAttributeEntropy": null, "MinMeansOfNumericAtts": 23.238095238095237, "Quartile2MutualInformation": null, "REPTreeDepth3ErrRate": null, "DecisionStumpErrRate": null, "MaxKurtosisOfNumericAtts": 2.403885403467324, "MinMutualInformation": null, "Quartile2SkewnessOfNumericAtts": 0.7221834356410596, "REPTreeDepth3Kappa": null, "DecisionStumpKappa": null, "MaxMeansOfNumericAtts": 2944.656084656085, "MinNominalAttDistinctValues": 2, "PercentageOfBinaryFeatures": 40, "Quartile2StdDevOfNumericAtts": 30.579380352748757, "RandomTreeDepth1AUC": null, "Dimensionality": 0.05291005291005291, "MaxMutualInformation": null, "MinSkewnessOfNumericAtts": -0.21017096945202257, "PercentageOfInstancesWithMissingValues": 0, "Quartile3AttributeEntropy": null, "RandomTreeDepth1ErrRate": null, "EquivalentNumberOfAtts": null, "MaxNominalAttDistinctValues": 6, "MinStdDevOfNumericAtts": 5.298677933404265, "PercentageOfMissingValues": 0, "Quartile3KurtosisOfNumericAtts": 2.403885403467324, "AutoCorrelation": -44.3936170212766, "RandomTreeDepth1Kappa": null, "J48.00001.AUC": null, "MaxSkewnessOfNumericAtts": 1.4020067975650343 }, "tags": [ { "uploader": "38960", "tag": "Biostatistics" }, { "uploader": "38960", "tag": "Health" }, { "uploader": "5243", "tag": "OpenML-Reg19" }, { "uploader": "38960", "tag": "Statistics" } ], "features": [ { "name": "class", "index": "9", "type": "numeric", "distinct": "133", "missing": "0", "target": "1", "min": "709", "max": "4990", "mean": "2945", "stdev": "729" }, { "name": "LOW", "index": "0", "type": "nominal", "distinct": "2", "missing": "0", "distr": [] }, { "name": "AGE", "index": "1", "type": "numeric", "distinct": "24", "missing": "0", "min": "14", "max": "45", "mean": "23", "stdev": "5" }, { "name": "LWT", "index": "2", "type": "numeric", "distinct": "75", "missing": "0", "min": "80", "max": "250", "mean": "130", "stdev": "31" }, { "name": "RACE", "index": "3", "type": "nominal", "distinct": "3", "missing": "0", "distr": [] }, { "name": "SMOKE", "index": "4", "type": "nominal", "distinct": "2", "missing": "0", "distr": [] }, { "name": "PTL", "index": "5", "type": "nominal", "distinct": "4", "missing": "0", "distr": [] }, { "name": "HT", "index": "6", "type": "nominal", "distinct": "2", "missing": "0", "distr": [] }, { "name": "UI", "index": "7", "type": "nominal", "distinct": "2", "missing": "0", "distr": [] }, { "name": "FTV", "index": "8", "type": "nominal", "distinct": "6", "missing": "0", "distr": [] } ], "nr_of_issues": 0, "nr_of_downvotes": 0, "nr_of_likes": 0, "nr_of_downloads": 0, "total_downloads": 0, "reach": 0, "reuse": 0, "impact_of_reuse": 0, "reach_of_reuse": 0, "impact": 0 }