{ "data_id": "44970", "name": "QSAR_fish_toxicity", "exact_name": "QSAR_fish_toxicity", "version": 7, "version_label": null, "description": "**Data Description**\n\nData set containing values for 6 attributes (molecular descriptors) of 908 chemicals used to predict quantitative acute aquatic toxicity towards the fish Pimephales promelas (fathead minnow).\n\nThis dataset was used to develop quantitative regression QSAR models to predict acute aquatic toxicity towards the fish Pimephales promelas (fathead minnow) on a set of 908 chemicals. LC50 data, which is the concentration that causes death in 50% of test fish over a test duration of 96 hours, was used as model response.\n\n**Attribute Description**\n\nThe model comprised 6 molecular descriptors\n\n1. *CIC0* - information indices\n2. *SM1_Dz* - 2D matrix-based descriptors\n3. *GATS1i* - 2D autocorrelations\n4. *NdsCH* - atom-type counts\n5. *NdssC* - atom-type counts\n6. *MLOGP* - molecular properties\n7. *LC50* - quantitative response, LC50 [-LOG(mol\/L)], target feature", "format": "arff", "uploader": "Sebastian Fischer", "uploader_id": 30127, "visibility": "public", "creator": null, "contributor": null, "date": "2022-12-22 16:13:08", "update_comment": null, "last_update": "2022-12-22 16:13:08", "licence": "CC BY 4.0", "status": "active", "error_message": null, "url": "https:\/\/api.openml.org\/data\/download\/22111834\/file22f163d0ea8de.arff", "default_target_attribute": "LC50", "row_id_attribute": null, "ignore_attribute": null, "runs": 0, "suggest": { "input": [ "QSAR_fish_toxicity", "Data set containing values for 6 attributes (molecular descriptors) of 908 chemicals used to predict quantitative acute aquatic toxicity towards the fish Pimephales promelas (fathead minnow). This dataset was used to develop quantitative regression QSAR models to predict acute aquatic toxicity towards the fish Pimephales promelas (fathead minnow) on a set of 908 chemicals. LC50 data, which is the concentration that causes death in 50% of test fish over a test duration of 96 hours, was used as mo " ], "weight": 5 }, "qualities": { "NumberOfInstances": 908, "NumberOfFeatures": 7, "NumberOfClasses": 0, "NumberOfMissingValues": 0, "NumberOfInstancesWithMissingValues": 0, "NumberOfNumericFeatures": 7, "NumberOfSymbolicFeatures": 0, "MajorityClassSize": null, "MinorityClassPercentage": null, "MinorityClassSize": null, "NumberOfBinaryFeatures": 0, "PercentageOfBinaryFeatures": 0, "PercentageOfInstancesWithMissingValues": 0, "AutoCorrelation": -0.4339018743109173, "PercentageOfMissingValues": 0, "Dimensionality": 0.007709251101321586, "PercentageOfNumericFeatures": 100, "MajorityClassPercentage": null, "PercentageOfSymbolicFeatures": 0 }, "tags": [ { "tag": "Life Science", "uploader": "38960" }, { "tag": "Machine Learning", "uploader": "38960" }, { "tag": "study_353", "uploader": "0" } ], "features": [ { "name": "LC50", "index": "6", "type": "numeric", "distinct": "827", "missing": "0", "target": "1", "min": "0", "max": "10", "mean": "4", "stdev": "1" }, { "name": "CIC0", "index": "0", "type": "numeric", "distinct": "502", "missing": "0", "min": "1", "max": "6", "mean": "3", "stdev": "1" }, { "name": "SM1_Dz", "index": "1", "type": "numeric", "distinct": "186", "missing": "0", "min": "0", "max": "2", "mean": "1", "stdev": "0" }, { "name": "GATS1i", "index": "2", "type": "numeric", "distinct": "557", "missing": "0", "min": "0", "max": "3", "mean": "1", "stdev": "0" }, { "name": "NdsCH", "index": "3", "type": "numeric", "distinct": "5", "missing": "0", "min": "0", "max": "4", "mean": "0", "stdev": "1" }, { "name": "NdssC", "index": "4", "type": "numeric", "distinct": "7", "missing": "0", "min": "0", "max": "6", "mean": "0", "stdev": "1" }, { "name": "MLOGP", "index": "5", "type": "numeric", "distinct": "559", "missing": "0", "min": "-3", "max": "7", "mean": "2", "stdev": "1" } ], "nr_of_issues": 0, "nr_of_downvotes": 0, "nr_of_likes": 0, "nr_of_downloads": 0, "total_downloads": 0, "reach": 0, "reuse": 0, "impact_of_reuse": 0, "reach_of_reuse": 0, "impact": 0 }