{ "data_id": "43079", "name": "Code_Smells_Data_Class", "exact_name": "Code_Smells_Data_Class", "version": 1, "version_label": "v1", "description": "This dataset combines records from the MLCQ dataset with metrics extracted using the PMD Tool and the Understand tool, to determine whether a file contains code smells. Please note that the records are on (sub)class level. Classification task, the default class (severity) should be binarized with a static threshold (preferably between 0.5 and 2.5). Please carefully read the publication to understand how to use this dataset.", "format": "arff", "uploader": "Jan van Rijn", "uploader_id": 1, "visibility": "public", "creator": "\"Chitsutha Soomlek, Jan N. van Rijn and Marcello M. Bonsangue\"", "contributor": "\"MLCQ Team, PMD Team, Understand Team\"", "date": "2021-08-10 17:24:35", "update_comment": null, "last_update": "2021-08-10 17:24:35", "licence": "Public", "status": "active", "error_message": null, "url": "https:\/\/www.openml.org\/data\/download\/22045960\/dataset", "default_target_attribute": "severity", "row_id_attribute": null, "ignore_attribute": "\"repository\",\"package\",\"filename\",\"code_name\",\"commit_hash\",\"smell\",\"class_name\"", "runs": 0, "suggest": { "input": [ "Code_Smells_Data_Class", "This dataset combines records from the MLCQ dataset with metrics extracted using the PMD Tool and the Understand tool, to determine whether a file contains code smells. Please note that the records are on (sub)class level. Classification task, the default class (severity) should be binarized with a static threshold (preferably between 0.5 and 2.5). Please carefully read the publication to understand how to use this dataset. " ], "weight": 5 }, "qualities": { "NumberOfInstances": 86467, "NumberOfFeatures": 67, "NumberOfClasses": 0, "NumberOfMissingValues": 2852906, "NumberOfInstancesWithMissingValues": 86467, "NumberOfNumericFeatures": 67, "NumberOfSymbolicFeatures": 0, "Dimensionality": 0.0007748620861137775, "PercentageOfNumericFeatures": 100, "MajorityClassPercentage": null, "PercentageOfSymbolicFeatures": 0, "MajorityClassSize": null, "MinorityClassPercentage": null, "MinorityClassSize": null, "NumberOfBinaryFeatures": 0, "PercentageOfBinaryFeatures": 0, "PercentageOfInstancesWithMissingValues": 100, "AutoCorrelation": 0.9900978419263063, "PercentageOfMissingValues": 49.2450143605817 }, "tags": [ { "uploader": "38960", "tag": "Machine Learning" }, { "uploader": "38960", "tag": "Statistics" } ], "features": [ { "name": "severity", "index": "6", "type": "numeric", "distinct": "26", "missing": "0", "target": "1", "min": "0", "max": "3", "mean": "0", "stdev": "0" }, { "name": "repository", "index": "0", "type": "string", "distinct": "431", "missing": "0", "ignore": "1" }, { "name": "package", "index": "1", "type": "string", "distinct": "2078", "missing": "0", "ignore": "1" }, { "name": "filename", "index": "2", "type": "string", "distinct": "2252", "missing": "0", "ignore": "1" }, { "name": "code_name", "index": "3", "type": "string", "distinct": "2329", "missing": "0", "ignore": "1" }, { "name": "commit_hash", "index": "4", "type": "string", "distinct": "431", "missing": "0", "ignore": "1" }, { "name": "smell", "index": "5", "type": "string", "distinct": "1", "missing": "0", "ignore": "1" }, { "name": "AvgCyclomatic", "index": "7", "type": "numeric", "distinct": "21", "missing": "33950", "min": "0", "max": "44", "mean": "2", "stdev": "2" }, { "name": "AvgCyclomaticModified", "index": "8", "type": "numeric", "distinct": "21", "missing": "33950", "min": "0", "max": "35", "mean": "2", "stdev": "2" }, { "name": "AvgCyclomaticStrict", "index": "9", "type": "numeric", "distinct": "24", "missing": "33950", "min": "0", "max": "52", "mean": "2", "stdev": "2" }, { "name": "AvgEssential", "index": "10", "type": "numeric", "distinct": "13", "missing": "33950", "min": "0", "max": "22", "mean": "1", "stdev": "0" }, { "name": "AvgLine", "index": "11", "type": "numeric", "distinct": "74", "missing": "33950", "min": "0", "max": "394", "mean": "12", "stdev": "12" }, { "name": "AvgLineBlank", "index": "12", "type": "numeric", "distinct": "20", "missing": "33950", "min": "0", "max": "54", "mean": "0", "stdev": "1" }, { "name": "AvgLineCode", "index": "13", "type": "numeric", "distinct": "64", "missing": "33950", "min": "0", "max": "290", "mean": "12", "stdev": "12" }, { "name": "AvgLineComment", "index": "14", "type": "numeric", "distinct": "29", "missing": "33950", "min": "0", "max": "123", "mean": "0", "stdev": "1" }, { "name": "CountClassBase", "index": "15", "type": "numeric", "distinct": "9", "missing": "33950", "min": "0", "max": "8", "mean": "1", "stdev": "0" }, { "name": "CountClassCoupled", "index": "16", "type": "numeric", "distinct": "77", "missing": "33950", "min": "0", "max": "166", "mean": "5", "stdev": "6" }, { "name": "CountClassCoupledModified", "index": "17", "type": "numeric", "distinct": "77", "missing": "33950", "min": "0", "max": "166", "mean": "5", "stdev": "6" }, { "name": "CountClassDerived", "index": "18", "type": "numeric", "distinct": "29", "missing": "33950", "min": "0", "max": "134", "mean": "0", "stdev": "1" }, { "name": "CountDeclClass", "index": "19", "type": "numeric", "distinct": "0", "missing": "86467", "min": "2147483647", "max": "0", "mean": "0", "stdev": "0" }, { "name": "CountDeclClassMethod", "index": "20", "type": "numeric", "distinct": "32", "missing": "33950", "min": "0", "max": "57", "mean": "0", "stdev": "1" }, { "name": "CountDeclClassVariable", "index": "21", "type": "numeric", "distinct": "26", "missing": "33950", "min": "0", "max": "372", "mean": "0", "stdev": "2" }, { "name": "CountDeclExecutableUnit", "index": "22", "type": "numeric", "distinct": "0", "missing": "86467", "min": "2147483647", "max": "0", "mean": "0", "stdev": "0" }, { "name": "CountDeclFile", "index": "23", "type": "numeric", "distinct": "0", "missing": "86467", "min": "2147483647", "max": "0", "mean": "0", "stdev": "0" }, { "name": "CountDeclFunction", "index": "24", "type": "numeric", "distinct": "0", "missing": "86467", "min": "2147483647", "max": "0", "mean": "0", "stdev": "0" }, { "name": "CountDeclInstanceMethod", "index": "25", "type": "numeric", "distinct": "64", "missing": "33950", "min": "0", "max": "227", "mean": "4", "stdev": "8" }, { "name": "CountDeclInstanceVariable", "index": "26", "type": "numeric", "distinct": "41", "missing": "33950", "min": "0", "max": "113", "mean": "1", "stdev": "2" }, { "name": "CountDeclMethod", "index": "27", "type": "numeric", "distinct": "66", "missing": "33950", "min": "0", "max": "229", "mean": "4", "stdev": "8" }, { "name": "CountDeclMethodAll", "index": "28", "type": "numeric", "distinct": "139", "missing": "33950", "min": "0", "max": "729", "mean": "5", "stdev": "14" }, { "name": "CountDeclMethodDefault", "index": "29", "type": "numeric", "distinct": "27", "missing": "33950", "min": "0", "max": "122", "mean": "0", "stdev": "1" }, { "name": "CountDeclMethodPrivate", "index": "30", "type": "numeric", "distinct": "29", "missing": "33950", "min": "0", "max": "39", "mean": "0", "stdev": "1" }, { "name": "CountDeclMethodProtected", "index": "31", "type": "numeric", "distinct": "21", "missing": "33950", "min": "0", "max": "22", "mean": "0", "stdev": "1" }, { "name": "CountDeclMethodPublic", "index": "32", "type": "numeric", "distinct": "58", "missing": "33950", "min": "0", "max": "227", "mean": "3", "stdev": "8" }, { "name": "CountInput", "index": "33", "type": "numeric", "distinct": "0", "missing": "86467", "min": "2147483647", "max": "0", "mean": "0", "stdev": "0" }, { "name": "CountLine", "index": "34", "type": "numeric", "distinct": "428", "missing": "33950", "min": "1", "max": "3944", "mean": "53", "stdev": "135" }, { "name": "CountLineBlank", "index": "35", "type": "numeric", "distinct": "122", "missing": "33950", "min": "0", "max": "568", "mean": "4", "stdev": "13" }, { "name": "CountLineCode", "index": "36", "type": "numeric", "distinct": "338", "missing": "33950", "min": "1", "max": "3365", "mean": "45", "stdev": "103" }, { "name": "CountLineCodeDecl", "index": "37", "type": "numeric", "distinct": "159", "missing": "33950", "min": "1", "max": "616", "mean": "9", "stdev": "25" }, { "name": "CountLineCodeExe", "index": "38", "type": "numeric", "distinct": "251", "missing": "33950", "min": "0", "max": "2323", "mean": "28", "stdev": "67" }, { "name": "CountLineComment", "index": "39", "type": "numeric", "distinct": "188", "missing": "33950", "min": "0", "max": "1868", "mean": "4", "stdev": "30" }, { "name": "CountOutput", "index": "40", "type": "numeric", "distinct": "0", "missing": "86467", "min": "2147483647", "max": "0", "mean": "0", "stdev": "0" }, { "name": "CountPath", "index": "41", "type": "numeric", "distinct": "0", "missing": "86467", "min": "2147483647", "max": "0", "mean": "0", "stdev": "0" }, { "name": "CountPathLog", "index": "42", "type": "numeric", "distinct": "0", "missing": "86467", "min": "2147483647", "max": "0", "mean": "0", "stdev": "0" }, { "name": "CountSemicolon", "index": "43", "type": "numeric", "distinct": "217", "missing": "33950", "min": "0", "max": "1447", "mean": "21", "stdev": "44" }, { "name": "CountStmt", "index": "44", "type": "numeric", "distinct": "268", "missing": "33950", "min": "1", "max": "2058", "mean": "31", "stdev": "64" }, { "name": "CountStmtDecl", "index": "45", "type": "numeric", "distinct": "140", "missing": "33950", "min": "1", "max": "511", "mean": "9", "stdev": "19" }, { "name": "CountStmtExe", "index": "46", "type": "numeric", "distinct": "223", "missing": "33950", "min": "0", "max": "1745", "mean": "23", "stdev": "51" }, { "name": "Cyclomatic", "index": "47", "type": "numeric", "distinct": "0", "missing": "86467", "min": "2147483647", "max": "0", "mean": "0", "stdev": "0" }, { "name": "CyclomaticModified", "index": "48", "type": "numeric", "distinct": "0", "missing": "86467", "min": "2147483647", "max": "0", "mean": "0", "stdev": "0" }, { "name": "CyclomaticStrict", "index": "49", "type": "numeric", "distinct": "0", "missing": "86467", "min": "2147483647", "max": "0", "mean": "0", "stdev": "0" }, { "name": "Essential", "index": "50", "type": "numeric", "distinct": "0", "missing": "86467", "min": "2147483647", "max": "0", "mean": "0", "stdev": "0" }, { "name": "Knots", "index": "51", "type": "numeric", "distinct": "0", "missing": "86467", "min": "2147483647", "max": "0", "mean": "0", "stdev": "0" }, { "name": "MaxCyclomatic", "index": "52", "type": "numeric", "distinct": "48", "missing": "33950", "min": "0", "max": "144", "mean": "4", "stdev": "4" }, { "name": "MaxCyclomaticModified", "index": "53", "type": "numeric", "distinct": "47", "missing": "33950", "min": "0", "max": "100", "mean": "3", "stdev": "3" }, { "name": "MaxCyclomaticStrict", "index": "54", "type": "numeric", "distinct": "54", "missing": "33950", "min": "0", "max": "199", "mean": "4", "stdev": "5" }, { "name": "MaxEssential", "index": "55", "type": "numeric", "distinct": "28", "missing": "33950", "min": "0", "max": "66", "mean": "2", "stdev": "1" }, { "name": "MaxEssentialKnots", "index": "56", "type": "numeric", "distinct": "0", "missing": "86467", "min": "2147483647", "max": "0", "mean": "0", "stdev": "0" }, { "name": "MaxInheritanceTree", "index": "57", "type": "numeric", "distinct": "11", "missing": "33950", "min": "0", "max": "10", "mean": "2", "stdev": "1" }, { "name": "MaxNesting", "index": "58", "type": "numeric", "distinct": "10", "missing": "33950", "min": "0", "max": "13", "mean": "1", "stdev": "1" }, { "name": "MinEssentialKnots", "index": "59", "type": "numeric", "distinct": "0", "missing": "86467", "min": "2147483647", "max": "0", "mean": "0", "stdev": "0" }, { "name": "PercentLackOfCohesion", "index": "60", "type": "numeric", "distinct": "83", "missing": "33950", "min": "0", "max": "100", "mean": "8", "stdev": "19" }, { "name": "PercentLackOfCohesionModified", "index": "61", "type": "numeric", "distinct": "96", "missing": "33950", "min": "0", "max": "100", "mean": "7", "stdev": "18" }, { "name": "RatioCommentToCode", "index": "62", "type": "numeric", "distinct": "284", "missing": "33950", "min": "0", "max": "12", "mean": "0", "stdev": "0" }, { "name": "SumCyclomatic", "index": "63", "type": "numeric", "distinct": "125", "missing": "33950", "min": "0", "max": "507", "mean": "8", "stdev": "19" }, { "name": "SumCyclomaticModified", "index": "64", "type": "numeric", "distinct": "123", "missing": "33950", "min": "0", "max": "507", "mean": "8", "stdev": "17" }, { "name": "SumCyclomaticStrict", "index": "65", "type": "numeric", "distinct": "133", "missing": "33950", "min": "0", "max": "582", "mean": "8", "stdev": "20" }, { "name": "SumEssential", "index": "66", "type": "numeric", "distinct": "84", "missing": "33950", "min": "0", "max": "251", "mean": "4", "stdev": "10" }, { "name": "WOC", "index": "67", "type": "numeric", "distinct": "321", "missing": "920", "min": "0", "max": "100", "mean": "89", "stdev": "16" }, { "name": "NOPA", "index": "68", "type": "numeric", "distinct": "35", "missing": "194", "min": "0", "max": "760", "mean": "0", "stdev": "4" }, { "name": "NOAM", "index": "69", "type": "numeric", "distinct": "43", "missing": "194", "min": "0", "max": "66", "mean": "1", "stdev": "2" }, { "name": "WMC", "index": "70", "type": "numeric", "distinct": "276", "missing": "194", "min": "0", "max": "2811", "mean": "20", "stdev": "50" }, { "name": "TCC", "index": "71", "type": "numeric", "distinct": "901", "missing": "26455", "min": "0", "max": "100", "mean": "2", "stdev": "11" }, { "name": "ATFD", "index": "72", "type": "numeric", "distinct": "235", "missing": "194", "min": "0", "max": "2814", "mean": "12", "stdev": "41" }, { "name": "class_name", "index": "73", "type": "string", "distinct": "16908", "missing": "194", "ignore": "1" } ], "nr_of_issues": 0, "nr_of_downvotes": 0, "nr_of_likes": 0, "nr_of_downloads": 0, "total_downloads": 0, "reach": 0, "reuse": 0, "impact_of_reuse": 0, "reach_of_reuse": 0, "impact": 0 }