{ "data_id": "43838", "name": "Municipal-Debt-Risk-Analysis", "exact_name": "Municipal-Debt-Risk-Analysis", "version": 1, "version_label": "v1.0", "description": "Context\nThis data has been extracted from the billing systems of 8 Municipalities in South Africa over a 2 year period and summarised according to their total amount billed versus the total amount paid. For each account there is an indicator of whether that account resulted in a Bad Debt.\nThis is a Classification exercise with the aim of finding out whether it is feasible to determine the probability of an account becoming a Bad Debt so that it will be possible to forecast the number (and value) of accounts that are at risk of developing into a Bad Debt.\nContent\nAccCategoryID: (Account Category ID) The numeric link in the database to the Account Category\nAccCategory: (Account Category) A classification of the type of account\nAccCategoryAbbr: (Account Category Abbreviation) An abbreviation of the classification of the type of account - to be used for One-hot encoding\nPropertyValue: (Property Value) The market value of the property\nPropertySize: (Property Size) The size of the property in square metres\nTotalBilling: (Total Billing) The total amount billed to the account for all services\nAverageBilling: (Average Billing) The average amount billed to the account for all services\nTotalReceipting: (Total Receipting) The total amount receipted to the account for all services\nAverageReceipting: (Average Receipting) The average amount receipted to the account for all services\nTotalDebt: (Total Debt) The Total Debt that is at 90 days or more\nTotalWriteOff: (Total Write Off) The Total amount of debt that has been written off\nCollectionRatio: (Collection Ratio) The ratio between the Total Receipting and Total Billing (ie. Total Receipting\/Total Billing)\nDebtBillingRatio: (Billing Debt Ratio) The ratio between the Total Debt and Total Billing (ie. (Total Debt + Total Write Off)\/Total Billing)\nTotalElectricityBill: (Total Electricity Bill) The total amount billed for electricity. This field was put in place because it is used as a means to recover debt - ie. If an amount is outstanding for any service the municipality has the right to cut a consumer's electricity connection.\nHasIDNo: (Has ID No.) The consumer has an ID number. This is similar to a Social Security number in the US and can be useful in legal proceedings. A consumer without any ID No. details is a lot harder to collect debt from. In addition, this field denotes that the account is held by a person and not a business. However, it is not very reliable as it's often not captured properly or at all.\nBadDebtIndic: (Bad Debt Indicator) 1 = Is considered to be a Bad Debt, 0 = Not considered to be a Bad Debt\nInspiration\nI welcome any feedback on the dataset as well as my methodology in classifying and modelling this dataset. The kernel that I have run against this dataset is my first and I am now working on a second attempt with different parameters. Any advice, criticisms etc - will be much appreciated", "format": "arff", "uploader": "Elif Ceren Gok", "uploader_id": 30125, "visibility": "public", "creator": null, "contributor": null, "date": "2022-03-24 15:33:08", "update_comment": null, "last_update": "2022-03-24 15:33:08", "licence": "CC0: Public Domain", "status": "active", "error_message": null, "url": "https:\/\/www.openml.org\/data\/download\/22102663\/dataset", "default_target_attribute": null, "row_id_attribute": null, "ignore_attribute": null, "runs": 0, "suggest": { "input": [ "Municipal-Debt-Risk-Analysis", "Context This data has been extracted from the billing systems of 8 Municipalities in South Africa over a 2 year period and summarised according to their total amount billed versus the total amount paid. For each account there is an indicator of whether that account resulted in a Bad Debt. This is a Classification exercise with the aim of finding out whether it is feasible to determine the probability of an account becoming a Bad Debt so that it will be possible to forecast the number (and value) " ], "weight": 5 }, "qualities": { "NumberOfInstances": 138509, "NumberOfFeatures": 16, "NumberOfClasses": null, "NumberOfMissingValues": 0, "NumberOfInstancesWithMissingValues": 0, "NumberOfNumericFeatures": 14, "NumberOfSymbolicFeatures": 0, "AutoCorrelation": null, "PercentageOfMissingValues": 0, "Dimensionality": 0.00011551595925174537, "PercentageOfNumericFeatures": 87.5, "MajorityClassPercentage": null, "PercentageOfSymbolicFeatures": 0, "MajorityClassSize": null, "MinorityClassPercentage": null, "MinorityClassSize": null, "NumberOfBinaryFeatures": 0, "PercentageOfBinaryFeatures": 0, "PercentageOfInstancesWithMissingValues": 0 }, "tags": [ { "uploader": "38960", "tag": "Machine Learning" }, { "uploader": "38960", "tag": "Statistics" } ], "features": [ { "name": "accountcategoryid", "index": "0", "type": "numeric", "distinct": "12", "missing": "0", "min": "1", "max": "13", "mean": "2", "stdev": "3" }, { "name": "accountcategory", "index": "1", "type": "string", "distinct": "12", "missing": "0" }, { "name": "acccatabbr", "index": "2", "type": "string", "distinct": "12", "missing": "0" }, { "name": "propertyvalue", "index": "3", "type": "numeric", "distinct": "11951", "missing": "0", "min": "0", "max": "421860000", "mean": "684309", "stdev": "2805861" }, { "name": "propertysize", "index": "4", "type": "numeric", "distinct": "17424", "missing": "0", "min": "0", "max": "444637539", "mean": "733835", "stdev": "5771966" }, { "name": "totalbilling", "index": "5", "type": "numeric", "distinct": "33209", "missing": "0", "min": "-4232630", "max": "26133617", "mean": "10788", "stdev": "129836" }, { "name": "avgbilling", "index": "6", "type": "numeric", "distinct": "6467", "missing": "0", "min": "-79384", "max": "805044", "mean": "555", "stdev": "5887" }, { "name": "totalreceipting", "index": "7", "type": "numeric", "distinct": "27765", "missing": "0", "min": "-219260", "max": "24371134", "mean": "9474", "stdev": "115568" }, { "name": "avgreceipting", "index": "8", "type": "numeric", "distinct": "10536", "missing": "0", "min": "-219260", "max": "6828297", "mean": "1939", "stdev": "32340" }, { "name": "total90debt", "index": "9", "type": "numeric", "distinct": "25001", "missing": "0", "min": "0", "max": "13091344", "mean": "9677", "stdev": "69158" }, { "name": "totalwriteoff", "index": "10", "type": "numeric", "distinct": "6129", "missing": "0", "min": "0", "max": "4306605", "mean": "573", "stdev": "12478" }, { "name": "collectionratio", "index": "11", "type": "numeric", "distinct": "1967", "missing": "0", "min": "-115000", "max": "1701912", "mean": "14", "stdev": "4746" }, { "name": "debtbillingratio", "index": "12", "type": "numeric", "distinct": "7467", "missing": "0", "min": "-436506", "max": "1836304", "mean": "17", "stdev": "5432" }, { "name": "totalelecbill", "index": "13", "type": "numeric", "distinct": "13717", "missing": "0", "min": "-295770", "max": "22711689", "mean": "4221", "stdev": "102252" }, { "name": "hasidno", "index": "14", "type": "numeric", "distinct": "2", "missing": "0", "min": "0", "max": "1", "mean": "0", "stdev": "0" }, { "name": "baddebt", "index": "15", "type": "numeric", "distinct": "2", "missing": "0", "min": "0", "max": "1", "mean": "0", "stdev": "0" } ], "nr_of_issues": 0, "nr_of_downvotes": 0, "nr_of_likes": 0, "nr_of_downloads": 0, "total_downloads": 0, "reach": 0, "reuse": 0, "impact_of_reuse": 0, "reach_of_reuse": 0, "impact": 0 }