{ "data_id": "40981", "name": "Australian", "exact_name": "Australian", "version": 4, "version_label": "4", "description": "**Author**: Confidential. Donated by Ross Quinlan \n**Source**: [LibSVM](https:\/\/www.csie.ntu.edu.tw\/~cjlin\/libsvmtools\/datasets\/binary.html), [UCI](https:\/\/archive.ics.uci.edu\/ml\/datasets\/Statlog+(Australian+Credit+Approval)) - 1987 \n**Please cite**: [UCI](https:\/\/archive.ics.uci.edu\/ml\/citation_policy.html) \n\n**Important note:** This dataset is derived from [credit-approval](https:\/\/www.openml.org\/d\/29), even though both datasets exist individually on UCI. In this version, missing values were filled in (not clear how) and a duplicate feature was removed. \n\n**Australian Credit Approval**. This is the famous Australian Credit Approval dataset, originating from the StatLog project. It concerns credit card applications. All attribute names and values have been changed to meaningless symbols to protect the confidentiality of the data. \n\nThis dataset was retrieved 2014-11-14 from the UCI site and converted to the ARFF format.\n\n__Major changes w.r.t. version 3: dataset from UCI that matches description and data types__\n\n\n### Feature information\n\nThere are 6 numerical and 8 categorical attributes, all normalized to [-1,1]. The original formatting was as follows: \n\nA1: 0,1 CATEGORICAL (formerly: a,b) \nA2: continuous. \nA3: continuous. \nA4: 1,2,3 CATEGORICAL (formerly: p,g,gg) \nA5: 1, 2,3,4,5, 6,7,8,9,10,11,12,13,14 CATEGORICAL (formerly: ff,d,i,k,j,aa,m,c,w, e, q, r,cc, x) \nA6: 1, 2,3, 4,5,6,7,8,9 CATEGORICAL (formerly: ff,dd,j,bb,v,n,o,h,z) \nA7: continuous. \nA8: 1, 0 CATEGORICAL (formerly: t, f) \nA9: 1, 0 CATEGORICAL (formerly: t, f) \nA10: continuous. \nA11: 1, 0 CATEGORICAL (formerly t, f) \nA12: 1, 2, 3 CATEGORICAL (formerly: s, g, p) \nA13: continuous. \nA14: continuous. \nA15: 1,2 class attribute (formerly: +,-) \n\n### Relevant Papers\n\nRoss Quinlan. \"Simplifying decision trees\", Int J Man-Machine Studies 27, Dec 1987, pp. 221-234. \n\nRoss Quinlan. \"C4.5: Programs for Machine Learning\", Morgan Kaufmann, Oct 1992", "format": "ARFF", "uploader": "Jann Goschenhofer", "uploader_id": 4265, "visibility": "public", "creator": null, "contributor": null, "date": "2017-12-04 22:15:38", "update_comment": null, "last_update": "2017-12-04 22:15:38", "licence": "Public", "status": "active", "error_message": null, "url": "https:\/\/www.openml.org\/data\/download\/18151910\/phpelnJ6y", "default_target_attribute": "A15", "row_id_attribute": null, "ignore_attribute": null, "runs": 4207, "suggest": { "input": [ "Australian", "This dataset was retrieved 2014-11-14 from the UCI site and converted to the ARFF format. __Major changes w.r.t. version 3: dataset from UCI that matches description and data types__ ### Feature information There are 6 numerical and 8 categorical attributes, all normalized to [-1,1]. The original formatting was as follows: A1: 0,1 CATEGORICAL (formerly: a,b) A2: continuous. A3: continuous. A4: 1,2,3 CATEGORICAL (formerly: p,g,gg) A5: 1, 2,3,4,5, 6,7,8,9,10,11,12,13,14 CATEGORICAL (formerly: ff,d " ], "weight": 5 }, "qualities": { "NumberOfInstances": 690, "NumberOfFeatures": 15, "NumberOfClasses": 2, "NumberOfMissingValues": 0, "NumberOfInstancesWithMissingValues": 0, "NumberOfNumericFeatures": 6, "NumberOfSymbolicFeatures": 9, "MaxMutualInformation": 0.42570942667283, "MinNominalAttDistinctValues": 2, "PercentageOfNumericFeatures": 40, "Quartile3MeansOfNumericAtts": 99.18369565217391, "MaxNominalAttDistinctValues": 14, "MinSkewnessOfNumericAtts": 0.3926498153530712, "PercentageOfSymbolicFeatures": 60, "Quartile3MutualInformation": 0.144504194273545, "MaxSkewnessOfNumericAtts": 2.1255033078408267, "MinStdDevOfNumericAtts": 4.034412666031165, "Quartile1AttributeEntropy": 0.8387161319968748, "Quartile3SkewnessOfNumericAtts": 1.3355273342189935, "MaxStdDevOfNumericAtts": 92.93205207446056, "MinorityClassPercentage": 44.492753623188406, "Quartile1KurtosisOfNumericAtts": -0.8847596859511218, "Quartile3StdDevOfNumericAtts": 77.75429949095053, "MeanAttributeEntropy": 1.309926400235449, "MinorityClassSize": 307, "Quartile1MeansOfNumericAtts": 28.685507246376808, "StdvNominalAttDistinctValues": 4.14661843487491, "MeanKurtosisOfNumericAtts": 0.19032997589682876, "NumberOfBinaryFeatures": 5, "Quartile1MutualInformation": 0.003050061391925, "MeanMeansOfNumericAtts": 64.2927536231884, "Quartile1SkewnessOfNumericAtts": 0.4586582920286034, "MeanMutualInformation": 0.09773048726553625, "Quartile1StdDevOfNumericAtts": 28.2552230012924, "AutoCorrelation": 0.5195936139332366, "MeanNoiseToSignalRatio": 12.403457169678743, "Quartile2AttributeEntropy": 0.9898462292119665, "ClassEntropy": 0.9912308989033523, "MeanNominalAttDistinctValues": 4.222222222222222, "Quartile2KurtosisOfNumericAtts": -0.5375502080957013, "Dimensionality": 0.021739130434782608, "MeanSkewnessOfNumericAtts": 0.9097524103570412, "Quartile2MeansOfNumericAtts": 56.970289855072465, "EquivalentNumberOfAtts": 10.14249418618115, "MeanStdDevOfNumericAtts": 51.82219374450552, "Quartile2MutualInformation": 0.03989592733091, "MajorityClassPercentage": 55.507246376811594, "MinAttributeEntropy": 0.5010395545266964, "Quartile2SkewnessOfNumericAtts": 0.6937491058414267, "MajorityClassSize": 383, "MinKurtosisOfNumericAtts": -0.9269912725749383, "PercentageOfBinaryFeatures": 33.33333333333333, "Quartile2StdDevOfNumericAtts": 52.47141132519069, "MaxAttributeEntropy": 3.496440211804627, "MinMeansOfNumericAtts": 3.2985507246376833, "PercentageOfInstancesWithMissingValues": 0, "Quartile3AttributeEntropy": 1.5856077791548016, "MaxKurtosisOfNumericAtts": 4.2653473291329025, "MinMutualInformation": 0.00013921104242, "PercentageOfMissingValues": 0, "Quartile3KurtosisOfNumericAtts": 0.878391861351421, "MaxMeansOfNumericAtts": 148.68695652173912 }, "tags": [ { "tag": "derived", "uploader": "2" }, { "tag": "Economics", "uploader": "38960" }, { "tag": "OpenML100", "uploader": "2" }, { "tag": "study_135", "uploader": "5824" }, { "tag": "study_144", "uploader": "5824" }, { "tag": "study_218", "uploader": "869" }, { "tag": "study_98", "uploader": "1935" }, { "tag": "study_271", "uploader": "0" }, { "tag": "study_240", "uploader": "0" }, { "tag": "study_379", "uploader": "0" }, { "tag": "study_446", "uploader": "0" }, { "tag": "study_447", "uploader": "0" }, { "tag": "study_448", "uploader": "0" }, { "tag": "study_449", "uploader": "0" } ], "features": [ { "name": "A15", "index": "14", "type": "nominal", "distinct": "2", "missing": "0", "target": "1", "distr": [ [ "0", "1" ], [ [ "383", "0" ], [ "0", "307" ] ] ] }, { "name": "A1", "index": "0", "type": "nominal", "distinct": "2", "missing": "0", "distr": [ [ "0", "1" ], [ [ "121", "101" ], [ "262", "206" ] ] ] }, { "name": "A2", "index": "1", "type": "numeric", "distinct": "350", "missing": "0", "min": "1", "max": "350", "mean": "149", "stdev": "93" }, { "name": "A3", "index": "2", "type": "numeric", "distinct": "215", "missing": "0", "min": "1", "max": "215", "mean": "83", "stdev": "59" }, { "name": "A4", "index": "3", "type": "nominal", "distinct": "3", "missing": "0", "distr": [ [ "1", "2", "3" ], [ [ "118", "45" ], [ "265", "260" ], [ "0", "2" ] ] ] }, { "name": "A5", "index": "4", "type": "nominal", "distinct": "14", "missing": "0", "distr": [ [ "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14" ], [ [ "46", "7" ], [ "23", "7" ], [ "45", "14" ], [ "37", "14" ], [ "7", "3" ], [ "35", "19" ], [ "22", "16" ], [ "80", "66" ], [ "31", "33" ], [ "11", "14" ], [ "27", "51" ], [ "1", "2" ], [ "12", "29" ], [ "6", "32" ] ] ] }, { "name": "A6", "index": "5", "type": "nominal", "distinct": "8", "missing": "0", "distr": [ [ "1", "2", "3", "4", "5", "7", "8", "9" ], [ [ "49", "8" ], [ "4", "2" ], [ "5", "3" ], [ "235", "173" ], [ "34", "25" ], [ "3", "3" ], [ "51", "87" ], [ "2", "6" ] ] ] }, { "name": "A7", "index": "6", "type": "numeric", "distinct": "132", "missing": "0", "min": "1", "max": "132", "mean": "37", "stdev": "36" }, { "name": "A8", "index": "7", "type": "nominal", "distinct": "2", "missing": "0", "distr": [ [ "0", "1" ], [ [ "306", "23" ], [ "77", "284" ] ] ] }, { "name": "A9", "index": "8", "type": "nominal", "distinct": "2", "missing": "0", "distr": [ [ "0", "1" ], [ [ "297", "98" ], [ "86", "209" ] ] ] }, { "name": "A10", "index": "9", "type": "numeric", "distinct": "23", "missing": "0", "min": "1", "max": "23", "mean": "3", "stdev": "4" }, { "name": "A11", "index": "10", "type": "nominal", "distinct": "2", "missing": "0", "distr": [ [ "0", "1" ], [ [ "213", "161" ], [ "170", "146" ] ] ] }, { "name": "A12", "index": "11", "type": "nominal", "distinct": "3", "missing": "0", "distr": [ [ "1", "2", "3" ], [ [ "42", "15" ], [ "338", "287" ], [ "3", "5" ] ] ] }, { "name": "A13", "index": "12", "type": "numeric", "distinct": "171", "missing": "0", "min": "1", "max": "171", "mean": "59", "stdev": "46" }, { "name": "A14", "index": "13", "type": "numeric", "distinct": "240", "missing": "0", "min": "1", "max": "240", "mean": "55", "stdev": "73" } ], "nr_of_issues": 0, "nr_of_downvotes": 0, "nr_of_likes": 1, "nr_of_downloads": 10, "total_downloads": 13, "reach": 11, "reuse": 16, "impact_of_reuse": 0, "reach_of_reuse": 0, "impact": 16 }