{ "data_id": "45077", "name": "qsar", "exact_name": "qsar", "version": 1, "version_label": "1", "description": "The QSAR biodegradation dataset was built in the Milano Chemometrics and QSAR Research Group. The research leading to these results has received funding from the European Communitys Seventh Framework Programme [FP7\/2007-2013] under Grant Agreement n. 238701 of Marie Curie ITN Environmental Chemoinformatics (ECO) project.The data have been used to develop QSAR (Quantitative Structure Activity Relationships) models for the study of the relationships between chemical structure and biodegradation of molecules. Biodegradation experimental values of 1055 chemicals were collected from the webpage of the National Institute of Technology and Evaluation of Japan (NITE). Classification models were developed in order to discriminate ready (356) and not ready (699) biodegradable molecules by means of three different modelling methods: k Nearest Neighbours, Partial Least Squares Discriminant Analysis and Support Vector Machines. Details on attributes (molecular descriptors) selected in each model can be found in the quoted reference: Mansouri, K., Ringsted, T., Ballabio, D., Todeschini, R., Consonni, V. (2013). Quantitative Structure - Activity Relationship models for ready biodegradability of chemicals. Journal of Chemical Information and Modeling, 53, 867-878.Source: https:\/\/archive.ics.uci.edu\/ml\/datasets\/QSAR+biodegradation", "format": "arff", "uploader": "Young Lee", "uploader_id": 31892, "visibility": "public", "creator": "Yoontae Hwang, Youngbin Lee, Yongjae Lee", "contributor": null, "date": "2023-01-27 11:44:22", "update_comment": null, "last_update": "2023-01-27 11:44:22", "licence": "Public", "status": "active", "error_message": null, "url": "https:\/\/api.openml.org\/data\/download\/22112035\/dataset", "default_target_attribute": "class", "row_id_attribute": null, "ignore_attribute": null, "runs": 0, "suggest": { "input": [ "qsar", "The QSAR biodegradation dataset was built in the Milano Chemometrics and QSAR Research Group. The research leading to these results has received funding from the European Communitys Seventh Framework Programme [FP7\/2007-2013] under Grant Agreement n. 238701 of Marie Curie ITN Environmental Chemoinformatics (ECO) project.The data have been used to develop QSAR (Quantitative Structure Activity Relationships) models for the study of the relationships between chemical structure and biodegradation of " ], "weight": 5 }, "qualities": { "NumberOfInstances": 1055, "NumberOfFeatures": 41, "NumberOfClasses": 2, "NumberOfMissingValues": 0, "NumberOfInstancesWithMissingValues": 0, "NumberOfNumericFeatures": 30, "NumberOfSymbolicFeatures": 10, "PercentageOfInstancesWithMissingValues": 0, "AutoCorrelation": 1, "PercentageOfMissingValues": 0, "PercentageOfNumericFeatures": 73.17073170731707, "Dimensionality": 0.03886255924170616, "PercentageOfSymbolicFeatures": 24.390243902439025, "MajorityClassPercentage": 66.25592417061611, "MajorityClassSize": 699, "MinorityClassPercentage": 33.74407582938389, "MinorityClassSize": 356, "NumberOfBinaryFeatures": 3, "PercentageOfBinaryFeatures": 7.317073170731707 }, "tags": [ { "tag": "Geography", "uploader": "38960" }, { "tag": "Health", "uploader": "38960" }, { "tag": "study_340", "uploader": "0" }, { "tag": "study_341", "uploader": "0" } ], "features": [ { "name": "class", "index": "40", "type": "string", "distinct": "2", "missing": "0", "target": "1" }, { "name": "0", "index": "0", "type": "numeric", "distinct": "440", "missing": "0", "min": "2", "max": "6", "mean": "5", "stdev": "1" }, { "name": "1", "index": "1", "type": "numeric", "distinct": "1022", "missing": "0", "min": "1", "max": "9", "mean": "3", "stdev": "1" }, { "name": "7", "index": "2", "type": "numeric", "distinct": "188", "missing": "0", "min": "0", "max": "61", "mean": "37", "stdev": "9" }, { "name": "11", "index": "3", "type": "numeric", "distinct": "384", "missing": "0", "min": "-5", "max": "5", "mean": "0", "stdev": "1" }, { "name": "12", "index": "4", "type": "numeric", "distinct": "756", "missing": "0", "min": "2", "max": "6", "mean": "3", "stdev": "1" }, { "name": "13", "index": "5", "type": "numeric", "distinct": "373", "missing": "0", "min": "0", "max": "4", "mean": "1", "stdev": "1" }, { "name": "14", "index": "6", "type": "numeric", "distinct": "510", "missing": "0", "min": "4", "max": "13", "mean": "10", "stdev": "1" }, { "name": "16", "index": "7", "type": "numeric", "distinct": "167", "missing": "0", "min": "1", "max": "1", "mean": "1", "stdev": "0" }, { "name": "17", "index": "8", "type": "numeric", "distinct": "125", "missing": "0", "min": "1", "max": "1", "mean": "1", "stdev": "0" }, { "name": "21", "index": "9", "type": "numeric", "distinct": "352", "missing": "0", "min": "1", "max": "2", "mean": "1", "stdev": "0" }, { "name": "26", "index": "10", "type": "numeric", "distinct": "329", "missing": "0", "min": "1", "max": "3", "mean": "2", "stdev": "0" }, { "name": "27", "index": "11", "type": "numeric", "distinct": "205", "missing": "0", "min": "-1", "max": "1", "mean": "0", "stdev": "0" }, { "name": "29", "index": "12", "type": "numeric", "distinct": "470", "missing": "0", "min": "0", "max": "71", "mean": "9", "stdev": "12" }, { "name": "30", "index": "13", "type": "numeric", "distinct": "553", "missing": "0", "min": "0", "max": "18", "mean": "3", "stdev": "2" }, { "name": "35", "index": "14", "type": "numeric", "distinct": "705", "missing": "0", "min": "2", "max": "11", "mean": "4", "stdev": "1" }, { "name": "36", "index": "15", "type": "numeric", "distinct": "624", "missing": "0", "min": "1", "max": "6", "mean": "3", "stdev": "1" }, { "name": "38", "index": "16", "type": "numeric", "distinct": "862", "missing": "0", "min": "5", "max": "15", "mean": "9", "stdev": "1" }, { "name": "2", "index": "17", "type": "numeric", "distinct": "11", "missing": "0", "min": "0", "max": "12", "mean": "1", "stdev": "1" }, { "name": "4", "index": "18", "type": "numeric", "distinct": "16", "missing": "0", "min": "0", "max": "36", "mean": "1", "stdev": "2" }, { "name": "5", "index": "19", "type": "numeric", "distinct": "13", "missing": "0", "min": "0", "max": "13", "mean": "0", "stdev": "1" }, { "name": "6", "index": "20", "type": "numeric", "distinct": "15", "missing": "0", "min": "0", "max": "18", "mean": "2", "stdev": "2" }, { "name": "8", "index": "21", "type": "numeric", "distinct": "15", "missing": "0", "min": "0", "max": "24", "mean": "1", "stdev": "2" }, { "name": "9", "index": "22", "type": "numeric", "distinct": "12", "missing": "0", "min": "0", "max": "12", "mean": "2", "stdev": "2" }, { "name": "10", "index": "23", "type": "numeric", "distinct": "21", "missing": "0", "min": "0", "max": "44", "mean": "1", "stdev": "3" }, { "name": "15", "index": "24", "type": "numeric", "distinct": "24", "missing": "0", "min": "0", "max": "40", "mean": "4", "stdev": "4" }, { "name": "31", "index": "25", "type": "numeric", "distinct": "8", "missing": "0", "min": "0", "max": "8", "mean": "0", "stdev": "1" }, { "name": "32", "index": "26", "type": "numeric", "distinct": "11", "missing": "0", "min": "0", "max": "12", "mean": "1", "stdev": "2" }, { "name": "33", "index": "27", "type": "numeric", "distinct": "16", "missing": "0", "min": "0", "max": "18", "mean": "1", "stdev": "2" }, { "name": "37", "index": "28", "type": "numeric", "distinct": "8", "missing": "0", "min": "0", "max": "8", "mean": "1", "stdev": "1" }, { "name": "40", "index": "29", "type": "numeric", "distinct": "17", "missing": "0", "min": "0", "max": "27", "mean": "1", "stdev": "2" }, { "name": "39", "index": "30", "type": "nominal", "distinct": "5", "missing": "0", "distr": [] }, { "name": "20", "index": "31", "type": "nominal", "distinct": "4", "missing": "0", "distr": [] }, { "name": "28", "index": "32", "type": "nominal", "distinct": "2", "missing": "0", "distr": [] }, { "name": "23", "index": "33", "type": "nominal", "distinct": "2", "missing": "0", "distr": [] }, { "name": "3", "index": "34", "type": "nominal", "distinct": "4", "missing": "0", "distr": [] }, { "name": "22", "index": "35", "type": "nominal", "distinct": "13", "missing": "0", "distr": [] }, { "name": "34", "index": "36", "type": "nominal", "distinct": "8", "missing": "0", "distr": [] }, { "name": "19", "index": "37", "type": "nominal", "distinct": "4", "missing": "0", "distr": [] }, { "name": "25", "index": "38", "type": "nominal", "distinct": "4", "missing": "0", "distr": [] }, { "name": "24", "index": "39", "type": "nominal", "distinct": "2", "missing": "0", "distr": [] } ], "nr_of_issues": 0, "nr_of_downvotes": 0, "nr_of_likes": 0, "nr_of_downloads": 0, "total_downloads": 0, "reach": 0, "reuse": 0, "impact_of_reuse": 0, "reach_of_reuse": 0, "impact": 0 }