{ "data_id": "40788", "name": "Yeast-test", "exact_name": "Yeast-test", "version": 1, "version_label": null, "description": "Source:\r\n\r\nCreator and Maintainer: \r\n\r\nKenta Nakai \r\nInstitue of Molecular and Cellular Biology \r\nOsaka, University \r\n1-3 Yamada-oka, Suita 565 Japan \r\nnakai '@' imcb.osaka-u.ac.jp \r\nhttp:\/\/www.imcb.osaka-u.ac.jp\/nakai\/psort.html \r\n\r\nDonor: \r\n\r\nPaul Horton (paulh '@' cs.berkeley.edu)\r\n\r\n\r\nData Set Information:\r\n\r\nPredicted Attribute: Localization site of protein. ( non-numeric ). \r\n\r\nThe references below describe a predecessor to this dataset and its development. They also give results (not cross-validated) for classification by a rule-based expert system with that version of the dataset. \r\n\r\nReference: "Expert Sytem for Predicting Protein Localization Sites in Gram-Negative Bacteria", Kenta Nakai & Minoru Kanehisa, PROTEINS: Structure, Function, and Genetics 11:95-110, 1991. \r\n\r\nReference: "A Knowledge Base for Predicting Protein Localization Sites in Eukaryotic Cells", Kenta Nakai & Minoru Kanehisa, Genomics 14:897-911, 1992.\r\n\r\n\r\nAttribute Information:\r\n\r\n1. Sequence Name: Accession number for the SWISS-PROT database \r\n2. mcg: McGeoch's method for signal sequence recognition. \r\n3. gvh: von Heijne's method for signal sequence recognition. \r\n4. alm: Score of the ALOM membrane spanning region prediction program. \r\n5. mit: Score of discriminant analysis of the amino acid content of the N-terminal region (20 residues long) of mitochondrial and non-mitochondrial proteins. \r\n6. erl: Presence of "HDEL" substring (thought to act as a signal for retention in the endoplasmic reticulum lumen). Binary attribute. \r\n7. pox: Peroxisomal targeting signal in the C-terminus. \r\n8. vac: Score of discriminant analysis of the amino acid content of vacuolar and extracellular proteins. \r\n9. nuc: Score of discriminant analysis of nuclear localization signals of nuclear and non-nuclear proteins.\r\n\r\n#autoxgboost #autoweka", "format": "ARFF", "uploader": "Stefan Coors", "uploader_id": 1184, "visibility": "public", "creator": null, "contributor": null, "date": "2017-06-20 11:38:25", "update_comment": null, "last_update": "2017-06-20 11:38:25", "licence": "Public", "status": "in_preparation", "error_message": null, "url": "https:\/\/www.openml.org\/data\/download\/5797227\/phpqacpD2", "default_target_attribute": null, "row_id_attribute": null, "ignore_attribute": null, "runs": 0, "suggest": { "input": [ "Yeast-test", "Source: Creator and Maintainer: Kenta Nakai Institue of Molecular and Cellular Biology Osaka, University 1-3 Yamada-oka, Suita 565 Japan nakai '@' imcb.osaka-u.ac.jp http:\/\/www.imcb.osaka-u.ac.jp\/nakai\/psort.html Donor: Paul Horton (paulh '@' cs.berkeley.edu) Data Set Information: Predicted Attribute: Localization site of protein. ( non-numeric ). The references below describe a predecessor to this dataset and its development. They also give results (not cross-validated) for classification by a " ], "weight": 5 }, "qualities": { "NumberOfInstances": 445, "NumberOfFeatures": 9, "NumberOfClasses": null, "NumberOfMissingValues": 0, "NumberOfInstancesWithMissingValues": 0, "NumberOfNumericFeatures": 8, "NumberOfSymbolicFeatures": 1, "ClassEntropy": null, "MeanNoiseToSignalRatio": null, "Quartile2AttributeEntropy": null, "Dimensionality": 0.020224719101123594, "MeanNominalAttDistinctValues": 9, "Quartile2KurtosisOfNumericAtts": 4.87513214444596, "EquivalentNumberOfAtts": null, "MeanSkewnessOfNumericAtts": 3.2892921575996326, "Quartile2MeansOfNumericAtts": 0.49643820224719104, "MajorityClassPercentage": null, "MeanStdDevOfNumericAtts": 0.09893165329269435, "Quartile2MutualInformation": null, "MajorityClassSize": null, "MinAttributeEntropy": null, "Quartile2SkewnessOfNumericAtts": 1.1579507886585256, "MaxAttributeEntropy": null, "MinKurtosisOfNumericAtts": 0.4619470844258915, "PercentageOfBinaryFeatures": 0, "Quartile2StdDevOfNumericAtts": 0.09882757491274476, "MaxKurtosisOfNumericAtts": 219.98191073585, "MinMeansOfNumericAtts": 0.012696629213483157, "PercentageOfInstancesWithMissingValues": 0, "Quartile3AttributeEntropy": null, "MaxMeansOfNumericAtts": 0.5022471910112359, "MinMutualInformation": null, "PercentageOfMissingValues": 0, "Quartile3KurtosisOfNumericAtts": 48.04491579768744, "MaxMutualInformation": null, "MinNominalAttDistinctValues": 9, "PercentageOfNumericFeatures": 88.88888888888889, "Quartile3MeansOfNumericAtts": 0.501438202247191, "MaxNominalAttDistinctValues": 9, "MinSkewnessOfNumericAtts": -1.736369526979418, "PercentageOfSymbolicFeatures": 11.11111111111111, "Quartile3MutualInformation": null, "MaxSkewnessOfNumericAtts": 14.865841322579492, "MinStdDevOfNumericAtts": 0.033482307045809054, "Quartile1AttributeEntropy": null, "Quartile3SkewnessOfNumericAtts": 6.523168593588908, "MaxStdDevOfNumericAtts": 0.14962619008985611, "MinorityClassPercentage": null, "Quartile1KurtosisOfNumericAtts": 1.183963744278002, "Quartile3StdDevOfNumericAtts": 0.13591554701941125, "MeanAttributeEntropy": null, "MinorityClassSize": null, "Quartile1MeansOfNumericAtts": 0.26965168539325846, "StdvNominalAttDistinctValues": 0, "MeanKurtosisOfNumericAtts": 38.02680565623546, "NumberOfBinaryFeatures": 0, "Quartile1MutualInformation": null, "MeanMeansOfNumericAtts": 0.3815674157303371, "Quartile1SkewnessOfNumericAtts": -0.0006473566941736786, "AutoCorrelation": null, "MeanMutualInformation": null, "Quartile1StdDevOfNumericAtts": 0.06507842208242565 }, "tags": [ { "tag": "derived", "uploader": "1" } ], "features": [ { "name": "f0", "index": "0", "type": "numeric", "distinct": "70", "missing": "0", "min": "0", "max": "1", "mean": "0", "stdev": "0" }, { "name": "f1", "index": "1", "type": "numeric", "distinct": "66", "missing": "0", "min": "0", "max": "1", "mean": "0", "stdev": "0" }, { "name": "f2", "index": "2", "type": "numeric", "distinct": "47", "missing": "0", "min": "0", "max": "1", "mean": "1", "stdev": "0" }, { "name": "f3", "index": "3", "type": "numeric", "distinct": "63", "missing": "0", "min": "0", "max": "1", "mean": "0", "stdev": "0" }, { "name": "f4", "index": "4", "type": "numeric", "distinct": "2", "missing": "0", "min": "1", "max": "1", "mean": "1", "stdev": "0" }, { "name": "f5", "index": "5", "type": "numeric", "distinct": "3", "missing": "0", "min": "0", "max": "1", "mean": "0", "stdev": "0" }, { "name": "f6", "index": "6", "type": "numeric", "distinct": "35", "missing": "0", "min": "0", "max": "1", "mean": "1", "stdev": "0" }, { "name": "f7", "index": "7", "type": "numeric", "distinct": "47", "missing": "0", "min": "0", "max": "1", "mean": "0", "stdev": "0" }, { "name": "class", "index": "8", "type": "nominal", "distinct": "9", "missing": "0", "distr": [ [ "CYT", "NUC", "MIT", "ME3", "ME2", "ME1", "EXC", "VAC", "POX", "ERL" ], [ [ "138", "0", "0", "0", "0", "0", "0", "0", "0", "0" ], [ "0", "123", "0", "0", "0", "0", "0", "0", "0", "0" ], [ "0", "0", "81", "0", "0", "0", "0", "0", "0", "0" ], [ "0", "0", "0", "47", "0", "0", "0", "0", "0", "0" ], [ "0", "0", "0", "0", "18", "0", "0", "0", "0", "0" ], [ "0", "0", "0", "0", "0", "14", "0", "0", "0", "0" ], [ "0", "0", "0", "0", "0", "0", "10", "0", "0", "0" ], [ "0", "0", "0", "0", "0", "0", "0", "6", "0", "0" ], [ "0", "0", "0", "0", "0", "0", "0", "0", "8", "0" ], [ "0", "0", "0", "0", "0", "0", "0", "0", "0", "0" ] ] ] } ], "nr_of_issues": 0, "nr_of_downvotes": 0, "nr_of_likes": 0, "nr_of_downloads": 0, "total_downloads": 0, "reach": 0, "reuse": 0, "impact_of_reuse": 0, "reach_of_reuse": 0, "impact": 0 }