{ "data_id": "44963", "name": "physiochemical_protein", "exact_name": "physiochemical_protein", "version": 7, "version_label": null, "description": "**Data Description**\n\nThis is a data set of Physicochemical Properties of Protein Tertiary Structure. The data set is taken from CASP 5-9. There are 45730 decoys and size varying from 0 to 21 armstrong.\n\nThe goal of the dataset is to predict the size of the residue for a tertiary protein structure (a 3d protein structure). Once linked in the protein chain, an individual amino acid is called a residue. The target feature is root mean square error of the residue.\n\n**Attribute Description**\n\n1. *RMSD* - size of the residue\n2. *F1* - total surface area\n3. *F2* - non polar exposed area\n4. *F3* - fractional area of exposed non polar residue\n5. *F4* - fractional area of exposed non polar part of residue\n6. *F5* - molecular mass weighted exposed area\n7. *F6* - average deviation from standard exposed area of residue\n8. *F7* - Euclidian distance\n9. *F8* - secondary structure penalty\n10. *F9* - Spacial Distribution constraints (N,K Value)", "format": "arff", "uploader": "Sebastian Fischer", "uploader_id": 30127, "visibility": "public", "creator": null, "contributor": null, "date": "2022-12-22 16:11:56", "update_comment": null, "last_update": "2022-12-22 16:11:56", "licence": "CC BY 4.0", "status": "active", "error_message": null, "url": "https:\/\/api.openml.org\/data\/download\/22111827\/file22f167620a212.arff", "default_target_attribute": "RMSD", "row_id_attribute": null, "ignore_attribute": null, "runs": 0, "suggest": { "input": [ "physiochemical_protein", "This is a data set of Physicochemical Properties of Protein Tertiary Structure. The data set is taken from CASP 5-9. There are 45730 decoys and size varying from 0 to 21 armstrong. The goal of the dataset is to predict the size of the residue for a tertiary protein structure (a 3d protein structure). Once linked in the protein chain, an individual amino acid is called a residue. The target feature is root mean square error of the residue. 1. *RMSD* - size of the residue 2. *F1* - total surface a " ], "weight": 5 }, "qualities": { "NumberOfInstances": 45730, "NumberOfFeatures": 10, "NumberOfClasses": 0, "NumberOfMissingValues": 0, "NumberOfInstancesWithMissingValues": 0, "NumberOfNumericFeatures": 10, "NumberOfSymbolicFeatures": 0, "PercentageOfBinaryFeatures": 0, "PercentageOfInstancesWithMissingValues": 0, "AutoCorrelation": -5.824410658444305, "PercentageOfMissingValues": 0, "Dimensionality": 0.00021867483052700635, "PercentageOfNumericFeatures": 100, "MajorityClassPercentage": null, "PercentageOfSymbolicFeatures": 0, "MajorityClassSize": null, "MinorityClassPercentage": null, "MinorityClassSize": null, "NumberOfBinaryFeatures": 0 }, "tags": [ { "tag": "Statistics", "uploader": "38960" }, { "tag": "study_353", "uploader": "0" }, { "tag": "study_429", "uploader": "0" }, { "tag": "study_430", "uploader": "0" } ], "features": [ { "name": "RMSD", "index": "0", "type": "numeric", "distinct": "15903", "missing": "0", "target": "1", "min": "0", "max": "21", "mean": "8", "stdev": "6" }, { "name": "F1", "index": "1", "type": "numeric", "distinct": "39916", "missing": "0", "min": "2392", "max": "40035", "mean": "9872", "stdev": "4058" }, { "name": "F2", "index": "2", "type": "numeric", "distinct": "39863", "missing": "0", "min": "404", "max": "15312", "mean": "3017", "stdev": "1464" }, { "name": "F3", "index": "3", "type": "numeric", "distinct": "20089", "missing": "0", "min": "0", "max": "1", "mean": "0", "stdev": "0" }, { "name": "F4", "index": "4", "type": "numeric", "distinct": "40374", "missing": "0", "min": "10", "max": "369", "mean": "103", "stdev": "55" }, { "name": "F5", "index": "5", "type": "numeric", "distinct": "41868", "missing": "0", "min": "319490", "max": "5472011", "mean": "1368299", "stdev": "564037" }, { "name": "F6", "index": "6", "type": "numeric", "distinct": "39155", "missing": "0", "min": "32", "max": "598", "mean": "146", "stdev": "70" }, { "name": "F7", "index": "7", "type": "numeric", "distinct": "39450", "missing": "0", "min": "0", "max": "105948", "mean": "3990", "stdev": "1994" }, { "name": "F8", "index": "8", "type": "numeric", "distinct": "341", "missing": "0", "min": "0", "max": "350", "mean": "70", "stdev": "56" }, { "name": "F9", "index": "9", "type": "numeric", "distinct": "37299", "missing": "0", "min": "15", "max": "55", "mean": "35", "stdev": "6" } ], "nr_of_issues": 0, "nr_of_downvotes": 0, "nr_of_likes": 0, "nr_of_downloads": 0, "total_downloads": 0, "reach": 0, "reuse": 1, "impact_of_reuse": 0, "reach_of_reuse": 0, "impact": 1 }