{ "data_id": "43707", "name": "Govt.-of-India-Census-2001-District-Wise", "exact_name": "Govt.-of-India-Census-2001-District-Wise", "version": 1, "version_label": "v1.0", "description": "Context\nCensus of India is a rich database which can tell stories of over a billion Indians. It is important not only for research point of view, but commercially as well for the organizations that want to understand India's complex yet strongly knitted heterogeneity. \nHowever, nowhere on the web, there exists a single database that combines the district- wise information of all the variables (most include no more than 4-5 out of over 50 variables!). Extracting and using data from Census of India 2001 is quite a laborious task since all data is made available in scattered PDFs district wise. Individual PDFs can be extracted from http:\/\/www.censusindia.gov.in\/(S(ogvuk1y2e5sueoyc5eyc0g55))\/Tables_Published\/Basic_Data_Sheet.aspx. \nContent\nThis database has been extracted from Census of 2001 and includes data of 590 districts, having around 80 variables each. \nIn case of confusion regarding the context of the variable, refer to the following PDF and you will be able to make sense out of it: http:\/\/censusindia.gov.in\/Dist_File\/datasheet-2923.pdf \nAll the extraction work can be found https:\/\/github.com\/preetskhalsa97\/census2001auto \nThe final CSV can be found at finalCSV\/all.csv\nThe subtle hack that was used to automate extraction to a great extent was the the URLs of all the PDFs were same except the four digits (that were respective state and district codes). \nA few abbreviations used for states:\nAN- Andaman and Nicobar\nCG- Chhattisgarh\nDD- Daman and Diu\nDN_H- Dadra and Nagar Haveli\nJK- Jammu and Kashmir\nMP- Madhya Pradesh\nTN- Tamil Nadu\nUP- Uttar Pradesh\nWB- West Bengal \nA few variables for clarification: \nGrowth..19912001- population growth from 1991 to 2001\nX0..4 years- People in age group 0 to 4 years\nSC1- Scheduled Class with highest population\nAcknowledgements\nInspiration\nThis is a massive dataset which can be used to explain the interplay between education, caste, development, gender and much more. \nIt really can explain a lot about India and propel data driven research. \nHappy Number Crunching!", "format": "arff", "uploader": "Dustin Carrion", "uploader_id": 30123, "visibility": "public", "creator": null, "contributor": null, "date": "2022-03-24 07:25:22", "update_comment": null, "last_update": "2022-03-24 07:25:22", "licence": "Database: Open Database, Contents: Database Contents", "status": "active", "error_message": null, "url": "https:\/\/www.openml.org\/data\/download\/22102532\/dataset", "default_target_attribute": null, "row_id_attribute": null, "ignore_attribute": null, "runs": 0, "suggest": { "input": [ "Govt.-of-India-Census-2001-District-Wise", "Context Census of India is a rich database which can tell stories of over a billion Indians. It is important not only for research point of view, but commercially as well for the organizations that want to understand India's complex yet strongly knitted heterogeneity. However, nowhere on the web, there exists a single database that combines the district- wise information of all the variables (most include no more than 4-5 out of over 50 variables!). Extracting and using data from Census of India " ], "weight": 5 }, "qualities": { "NumberOfInstances": 590, "NumberOfFeatures": 82, "NumberOfClasses": null, "NumberOfMissingValues": 3219, "NumberOfInstancesWithMissingValues": 582, "NumberOfNumericFeatures": 47, "NumberOfSymbolicFeatures": 0, "Dimensionality": 0.13898305084745763, "PercentageOfNumericFeatures": 57.3170731707317, "MajorityClassPercentage": null, "PercentageOfSymbolicFeatures": 0, "MajorityClassSize": null, "MinorityClassPercentage": null, "MinorityClassSize": null, "NumberOfBinaryFeatures": 0, "PercentageOfBinaryFeatures": 0, "PercentageOfInstancesWithMissingValues": 98.64406779661017, "AutoCorrelation": null, "PercentageOfMissingValues": 6.653575857792476 }, "tags": [ { "uploader": "38960", "tag": "Computer Systems" }, { "uploader": "38960", "tag": "Social Media" } ], "features": [ { "name": "Unnamed:_0", "index": "0", "type": "numeric", "distinct": "590", "missing": "0", "min": "1", "max": "590", "mean": "296", "stdev": "170" }, { "name": "State", "index": "1", "type": "string", "distinct": "35", "missing": "0" }, { "name": "District", "index": "2", "type": "string", "distinct": "589", "missing": "0" }, { "name": "Persons", "index": "3", "type": "numeric", "distinct": "590", "missing": "0", "min": "31394", "max": "9610788", "mean": "1732902", "stdev": "1329998" }, { "name": "Males", "index": "4", "type": "numeric", "distinct": "590", "missing": "0", "min": "15893", "max": "4916370", "mean": "896660", "stdev": "693414" }, { "name": "Females", "index": "5", "type": "numeric", "distinct": "590", "missing": "0", "min": "14783", "max": "4694418", "mean": "836242", "stdev": "638168" }, { "name": "Growth..1991...2001.", "index": "6", "type": "string", "distinct": "540", "missing": "0" }, { "name": "Rural", "index": "7", "type": "string", "distinct": "582", "missing": "0" }, { "name": "Urban", "index": "8", "type": "string", "distinct": "60", "missing": "522" }, { "name": "Scheduled.Caste.population", "index": "9", "type": "string", "distinct": "14", "missing": "563" }, { "name": "Percentage...SC.to.total", "index": "10", "type": "string", "distinct": "13", "missing": "562" }, { "name": "Number.of.households", "index": "11", "type": "numeric", "distinct": "587", "missing": "3", "min": "6054", "max": "1838426", "mean": "325823", "stdev": "266637" }, { "name": "Household.size..per.household.", "index": "12", "type": "numeric", "distinct": "5", "missing": "3", "min": "4", "max": "8", "mean": "5", "stdev": "1" }, { "name": "Sex.ratio..females.per.1000.males.", "index": "13", "type": "numeric", "distinct": "219", "missing": "3", "min": "591", "max": "1147", "mean": "935", "stdev": "63" }, { "name": "Sex.ratio..0.6.years.", "index": "14", "type": "numeric", "distinct": "160", "missing": "3", "min": "766", "max": "1035", "mean": "929", "stdev": "48" }, { "name": "Scheduled.Tribe.population", "index": "15", "type": "string", "distinct": "538", "missing": "3" }, { "name": "Percentage.to.total.population..ST.", "index": "16", "type": "string", "distinct": "406", "missing": "3" }, { "name": "Persons..literate", "index": "17", "type": "numeric", "distinct": "590", "missing": "0", "min": "13424", "max": "6617264", "mean": "944285", "stdev": "843797" }, { "name": "Males..Literate", "index": "18", "type": "numeric", "distinct": "590", "missing": "0", "min": "8733", "max": "3853376", "mean": "567073", "stdev": "484530" }, { "name": "Females..Literate", "index": "19", "type": "numeric", "distinct": "590", "missing": "0", "min": "4177", "max": "2763888", "mean": "377212", "stdev": "364677" }, { "name": "Persons..literacy.rate", "index": "20", "type": "numeric", "distinct": "551", "missing": "0", "min": "30", "max": "97", "mean": "64", "stdev": "13" }, { "name": "Males..Literatacy.Rate", "index": "21", "type": "numeric", "distinct": "557", "missing": "0", "min": "40", "max": "98", "mean": "75", "stdev": "11" }, { "name": "Females..Literacy.Rate", "index": "22", "type": "numeric", "distinct": "557", "missing": "0", "min": "19", "max": "96", "mean": "53", "stdev": "16" }, { "name": "Total.Educated", "index": "23", "type": "numeric", "distinct": "587", "missing": "3", "min": "13424", "max": "6617264", "mean": "944478", "stdev": "845908" }, { "name": "Data.without.level", "index": "24", "type": "numeric", "distinct": "582", "missing": "3", "min": "105", "max": "877837", "mean": "33894", "stdev": "53235" }, { "name": "Below.Primary", "index": "25", "type": "numeric", "distinct": "587", "missing": "3", "min": "3821", "max": "2465907", "mean": "243767", "stdev": "220385" }, { "name": "Primary", "index": "26", "type": "numeric", "distinct": "587", "missing": "3", "min": "3146", "max": "1586123", "mean": "246428", "stdev": "212034" }, { "name": "Middle", "index": "27", "type": "numeric", "distinct": "587", "missing": "3", "min": "2481", "max": "1133954", "mean": "152429", "stdev": "141847" }, { "name": "Matric.Higher.Secondary.Diploma", "index": "28", "type": "numeric", "distinct": "587", "missing": "3", "min": "2497", "max": "2041567", "mean": "204171", "stdev": "218622" }, { "name": "Graduate.and.Above", "index": "29", "type": "numeric", "distinct": "586", "missing": "3", "min": "415", "max": "868911", "mean": "63622", "stdev": "91954" }, { "name": "X0...4.years", "index": "30", "type": "numeric", "distinct": "586", "missing": "3", "min": "2547", "max": "918006", "mean": "186595", "stdev": "137786" }, { "name": "X5...14.years", "index": "31", "type": "numeric", "distinct": "587", "missing": "3", "min": "5764", "max": "2333062", "mean": "427263", "stdev": "320536" }, { "name": "X15...59.years", "index": "32", "type": "numeric", "distinct": "587", "missing": "3", "min": "18019", "max": "5758483", "mean": "986586", "stdev": "799386" }, { "name": "X60.years.and.above..Incl..A.N.S..", "index": "33", "type": "numeric", "distinct": "586", "missing": "3", "min": "2178", "max": "743405", "mean": "133762", "stdev": "104830" }, { "name": "Total.workers", "index": "34", "type": "numeric", "distinct": "590", "missing": "0", "min": "9308", "max": "3751057", "mean": "677575", "stdev": "507492" }, { "name": "Main.workers", "index": "35", "type": "numeric", "distinct": "590", "missing": "0", "min": "8420", "max": "2971039", "mean": "527189", "stdev": "429006" }, { "name": "Marginal.workers", "index": "36", "type": "numeric", "distinct": "589", "missing": "0", "min": "611", "max": "1220945", "mean": "150386", "stdev": "110570" }, { "name": "Non.workers", "index": "37", "type": "numeric", "distinct": "590", "missing": "0", "min": "12136", "max": "5946126", "mean": "1055328", "stdev": "851222" }, { "name": "SC.1.Name", "index": "38", "type": "string", "distinct": "76", "missing": "51" }, { "name": "SC.1.Population", "index": "39", "type": "numeric", "distinct": "575", "missing": "13", "min": "2", "max": "1396353", "mean": "152126", "stdev": "159119" }, { "name": "SC.2.Name", "index": "40", "type": "string", "distinct": "111", "missing": "64" }, { "name": "SC.2.Population", "index": "41", "type": "numeric", "distinct": "570", "missing": "13", "min": "1", "max": "437085", "mean": "57614", "stdev": "66639" }, { "name": "SC.3.Name", "index": "42", "type": "string", "distinct": "128", "missing": "66" }, { "name": "SC.3.Population", "index": "43", "type": "numeric", "distinct": "562", "missing": "13", "min": "1", "max": "239230", "mean": "26567", "stdev": "29130" }, { "name": "Religeon.1.Name", "index": "44", "type": "string", "distinct": "6", "missing": "53" }, { "name": "Religeon.1.Population", "index": "45", "type": "numeric", "distinct": "590", "missing": "0", "min": "19477", "max": "8224779", "mean": "1430416", "stdev": "1085844" }, { "name": "Religeon.2.Name", "index": "46", "type": "string", "distinct": "6", "missing": "53" }, { "name": "Religeon.2.Population", "index": "47", "type": "numeric", "distinct": "590", "missing": "0", "min": "531", "max": "2295967", "mean": "248915", "stdev": "315226" }, { "name": "Religeon.3.Name", "index": "48", "type": "string", "distinct": "8", "missing": "53" }, { "name": "Religeon.3.Population", "index": "49", "type": "numeric", "distinct": "586", "missing": "0", "min": "90", "max": "488697", "mean": "39601", "stdev": "70961" }, { "name": "ST.1.Name", "index": "50", "type": "string", "distinct": "118", "missing": "0" }, { "name": "ST.1.Population", "index": "51", "type": "string", "distinct": "527", "missing": "51" }, { "name": "ST.2.Name", "index": "52", "type": "string", "distinct": "154", "missing": "50" }, { "name": "ST.2.Population", "index": "53", "type": "string", "distinct": "511", "missing": "53" }, { "name": "ST.3.Name", "index": "54", "type": "string", "distinct": "170", "missing": "50" }, { "name": "ST.3.Population", "index": "55", "type": "string", "distinct": "475", "missing": "52" }, { "name": "Imp.Town.1.Name", "index": "56", "type": "string", "distinct": "528", "missing": "21" }, { "name": "Imp.Town.1.Population", "index": "57", "type": "numeric", "distinct": "576", "missing": "14", "min": "996", "max": "4572876", "mean": "248160", "stdev": "488774" }, { "name": "Imp.Town.2.Name", "index": "58", "type": "string", "distinct": "480", "missing": "97" }, { "name": "Imp.Town.2.Population", "index": "59", "type": "numeric", "distinct": "526", "missing": "64", "min": "482", "max": "2433835", "mean": "68727", "stdev": "140433" }, { "name": "Imp.Town.3.Name", "index": "60", "type": "string", "distinct": "432", "missing": "142" }, { "name": "Imp.Town.3.Population", "index": "61", "type": "numeric", "distinct": "480", "missing": "108", "min": "605", "max": "704002", "mean": "41224", "stdev": "52627" }, { "name": "Total.Inhabited.Villages", "index": "62", "type": "numeric", "distinct": "521", "missing": "12", "min": "2", "max": "10548", "mean": "1016", "stdev": "817" }, { "name": "Drinking.water.facilities", "index": "63", "type": "numeric", "distinct": "515", "missing": "12", "min": "2", "max": "10475", "mean": "1010", "stdev": "810" }, { "name": "Safe.Drinking.water", "index": "64", "type": "numeric", "distinct": "522", "missing": "12", "min": "1", "max": "10455", "mean": "992", "stdev": "801" }, { "name": "Electricity..Power.Supply.", "index": "65", "type": "numeric", "distinct": "480", "missing": "12", "min": "2", "max": "4835", "mean": "775", "stdev": "611" }, { "name": "Electricity..domestic.", "index": "66", "type": "string", "distinct": "375", "missing": "12" }, { "name": "Electricity..Agriculture.", "index": "67", "type": "string", "distinct": "95", "missing": "208" }, { "name": "Primary.school", "index": "68", "type": "numeric", "distinct": "487", "missing": "12", "min": "1", "max": "6133", "mean": "797", "stdev": "555" }, { "name": "Middle.schools", "index": "69", "type": "string", "distinct": "361", "missing": "12" }, { "name": "Secondary.Sr.Secondary.schools", "index": "70", "type": "string", "distinct": "286", "missing": "12" }, { "name": "College", "index": "71", "type": "string", "distinct": "34", "missing": "52" }, { "name": "Medical.facility", "index": "72", "type": "string", "distinct": "412", "missing": "12" }, { "name": "Primary.Health.Centre", "index": "73", "type": "string", "distinct": "98", "missing": "12" }, { "name": "Primary.Health.Sub.Centre", "index": "74", "type": "string", "distinct": "270", "missing": "12" }, { "name": "Post..telegraph.and.telephone.facility", "index": "75", "type": "numeric", "distinct": "422", "missing": "12", "min": "2", "max": "2346", "mean": "420", "stdev": "332" }, { "name": "Bus.services", "index": "76", "type": "string", "distinct": "401", "missing": "12" }, { "name": "Paved.approach.road", "index": "77", "type": "string", "distinct": "458", "missing": "12" }, { "name": "Mud.approach.road", "index": "78", "type": "string", "distinct": "469", "missing": "12" }, { "name": "Permanent.House", "index": "79", "type": "numeric", "distinct": "437", "missing": "0", "min": "2", "max": "99", "mean": "48", "stdev": "25" }, { "name": "Semi.permanent.House", "index": "80", "type": "numeric", "distinct": "413", "missing": "0", "min": "1", "max": "95", "mean": "33", "stdev": "22" }, { "name": "Temporary.House", "index": "81", "type": "numeric", "distinct": "325", "missing": "0", "min": "0", "max": "86", "mean": "19", "stdev": "21" } ], "nr_of_issues": 0, "nr_of_downvotes": 0, "nr_of_likes": 0, "nr_of_downloads": 0, "total_downloads": 0, "reach": 0, "reuse": 0, "impact_of_reuse": 0, "reach_of_reuse": 0, "impact": 0 }