{ "data_id": "1090", "name": "MercuryinBass", "exact_name": "MercuryinBass", "version": 1, "version_label": null, "description": "**Author**: \n**Source**: Unknown - Date unknown \n**Please cite**: \n\nDatasets of Data And Story Library, project illustrating use of basic statistic methods, converted to arff format by Hakan Kjellerstrand.\nSource: TunedIT: http:\/\/tunedit.org\/repo\/DASL\n\nDASL file http:\/\/lib.stat.cmu.edu\/DASL\/Datafiles\/MercuryinBass.html\n\nMercury Contamination in Bass\n\nReference: Lange, Royals, & Connor. (1993). Transactions of the American Fisheries Society .\nAuthorization: contact authors\nDescription: Largemouth bass were studied in 53 different Florida lakes to examine the factors that influence the level of mercury contamination. Water samples were collected from the surface of the middle of each lake in August 1990 and then again in March 1991. The pH level, the amount of chlorophyll, calcium, and alkalinity were measured in each sample. The average of the August and March values were used in the analysis. Next, a sample of fish was taken from each lake with sample sizes ranging from 4 to 44 fish. The age of each fish and mercury concentration in the muscle tissue was measured. (Note: Since fish absorb mercury over time, older fish will tend to have higher concentrations). Thus, to make a fair comparison of the fish in different lakes, the investigators used a regression estimate of the expected mercury concentration in a three year old fish as the standardized value for each lake. Finally, in 10 of the 53 lakes, the age of the individual fish could not be determined and the average mercury concentration ofthe sampled fish was used instead of the standardized value.\nNumber of cases: 53\nVariable Names:\n\nID: ID number\nLake: Name of the lake\nAlkalinity: Alkalinity (mg\/L as Calcium Carbonate)\npH: pH\nCalcium: Calcium (mg\/l)\nChlorophyll: Chlorophyll (mg\/l)\nAvg_Mercury: Average mercury concentration (parts per million) in the muscle tissue of the fish sampled from that lake\nNo.samples: How many fish were sampled from the lake\nmin: Minimum mercury concentration amongst the sampled fish\nmax: Maximum mercury concentration amongst the sampled fish\n3_yr_Standard_mercury : Regression estimate of the mercury concentration in a 3 year old fish from the lake (or = Avg Mercury when age data was not available)\nage_data: Indicator of the availability of age data on fish sampled", "format": "ARFF", "uploader": "Joaquin Vanschoren", "uploader_id": 2, "visibility": "public", "creator": null, "contributor": null, "date": "2014-10-07 00:03:49", "update_comment": "set targets, ignores", "last_update": "2014-10-07 01:02:29", "licence": "Public", "status": "active", "error_message": null, "url": "https:\/\/www.openml.org\/data\/download\/53973\/MercuryinBass.arff", "kaggle_url": null, "default_target_attribute": "3_yr_Standard_Mercury", "row_id_attribute": null, "ignore_attribute": "\"ID\",\"Lake\"", "runs": 2, "suggest": { "input": [ "MercuryinBass", "Datasets of Data And Story Library, project illustrating use of basic statistic methods, converted to arff format by Hakan Kjellerstrand. Source: TunedIT: http:\/\/tunedit.org\/repo\/DASL DASL file http:\/\/lib.stat.cmu.edu\/DASL\/Datafiles\/MercuryinBass.html Mercury Contamination in Bass Reference: Lange, Royals, & Connor. (1993). Transactions of the American Fisheries Society . Authorization: contact authors Description: Largemouth bass were studied in 53 different Florida lakes to examine the factors " ], "weight": 5 }, "qualities": { "NumberOfInstances": 53, "NumberOfFeatures": 12, "NumberOfClasses": 0, "NumberOfMissingValues": 0, "NumberOfInstancesWithMissingValues": 0, "NumberOfNumericFeatures": 11, "NumberOfSymbolicFeatures": 1, "Quartile3MutualInformation": null, "CfsSubsetEval_DecisionStumpErrRate": null, "RandomTreeDepth2ErrRate": null, "J48.00001.Kappa": null, "MeanAttributeEntropy": null, "MinorityClassSize": null, "PercentageOfSymbolicFeatures": 8.333333333333332, "Quartile3SkewnessOfNumericAtts": 1.6753421914429074, "CfsSubsetEval_DecisionStumpKappa": null, "RandomTreeDepth2Kappa": null, "J48.0001.AUC": null, "MeanKurtosisOfNumericAtts": 1.590313257702028, "NaiveBayesAUC": null, "Quartile1AttributeEntropy": null, "Quartile3StdDevOfNumericAtts": 26.40351115303093, "CfsSubsetEval_NaiveBayesAUC": null, "RandomTreeDepth3AUC": null, "J48.0001.ErrRate": null, "MeanMeansOfNumericAtts": 10.55022641509434, "NaiveBayesErrRate": null, "Quartile1KurtosisOfNumericAtts": -0.4652283738954137, "REPTreeDepth1AUC": null, "CfsSubsetEval_NaiveBayesErrRate": null, "RandomTreeDepth3ErrRate": null, "J48.0001.Kappa": null, "MeanMutualInformation": null, "NaiveBayesKappa": null, "Quartile1MeansOfNumericAtts": 0.5236792452830188, "REPTreeDepth1ErrRate": null, "CfsSubsetEval_NaiveBayesKappa": null, "RandomTreeDepth3Kappa": null, "J48.001.AUC": null, "MeanNoiseToSignalRatio": null, "NumberOfBinaryFeatures": 0, "Quartile1MutualInformation": null, "REPTreeDepth1Kappa": null, "CfsSubsetEval_kNN1NAUC": null, "StdvNominalAttDistinctValues": null, "J48.001.ErrRate": null, "MeanNominalAttDistinctValues": null, "Quartile1SkewnessOfNumericAtts": 0.3039641661301623, "REPTreeDepth2AUC": null, "CfsSubsetEval_kNN1NErrRate": null, "kNN1NAUC": null, "J48.001.Kappa": null, "MeanSkewnessOfNumericAtts": 0.9061586163880923, "Quartile1StdDevOfNumericAtts": 0.3404590629662856, "REPTreeDepth2ErrRate": null, "CfsSubsetEval_kNN1NKappa": null, "kNN1NErrRate": null, "MajorityClassPercentage": null, "MeanStdDevOfNumericAtts": 10.562476460308833, "Quartile2AttributeEntropy": null, "Quartile2KurtosisOfNumericAtts": 0.7119799816941943, "REPTreeDepth2Kappa": null, "ClassEntropy": null, "kNN1NKappa": null, "MajorityClassSize": null, "MinAttributeEntropy": null, "Quartile2MeansOfNumericAtts": 3.732547169811321, "REPTreeDepth3AUC": null, "DecisionStumpAUC": null, "MaxAttributeEntropy": null, "MinKurtosisOfNumericAtts": -0.5149449394161025, "Quartile2MutualInformation": null, "REPTreeDepth3ErrRate": null, "DecisionStumpErrRate": null, "MaxKurtosisOfNumericAtts": 7.233472746032271, "MinMeansOfNumericAtts": 0.279811320754717, "Quartile2SkewnessOfNumericAtts": 1.013078876297577, "REPTreeDepth3Kappa": null, "DecisionStumpKappa": null, "MaxMeansOfNumericAtts": 37.530188679245285, "MinMutualInformation": null, "Quartile2StdDevOfNumericAtts": 0.9052480902435533, "RandomTreeDepth1AUC": null, "Dimensionality": 0.22641509433962265, "MaxMutualInformation": null, "MinNominalAttDistinctValues": null, "PercentageOfBinaryFeatures": 0, "Quartile3AttributeEntropy": null, "RandomTreeDepth1ErrRate": null, "EquivalentNumberOfAtts": null, "MaxNominalAttDistinctValues": null, "MinSkewnessOfNumericAtts": -1.638132937340291, "PercentageOfInstancesWithMissingValues": 0, "Quartile3KurtosisOfNumericAtts": 2.4950524422291984, "AutoCorrelation": 0.6684615384615386, "RandomTreeDepth1Kappa": null, "J48.00001.AUC": null, "MaxSkewnessOfNumericAtts": 2.7336667530207026, "MinStdDevOfNumericAtts": 0.22640578415788135, "PercentageOfMissingValues": 0, "Quartile3MeansOfNumericAtts": 22.430660377358492, "CfsSubsetEval_DecisionStumpAUC": null, "RandomTreeDepth2AUC": null, "J48.00001.ErrRate": null, "MaxStdDevOfNumericAtts": 38.20352674469919, "MinorityClassPercentage": null, "PercentageOfNumericFeatures": 91.66666666666666 }, "tags": [], "features": [ { "name": "3_yr_Standard_Mercury", "index": "10", "type": "numeric", "distinct": "38", "missing": "0", "target": "1", "min": "0", "max": "2", "mean": "1", "stdev": "0" }, { "name": "ID", "index": "0", "type": "numeric", "distinct": "53", "missing": "0", "ignore": "1", "min": "1", "max": "53", "mean": "27", "stdev": "15" }, { "name": "Lake", "index": "1", "type": "nominal", "distinct": "53", "missing": "0", "ignore": "1", "distr": [] }, { "name": "Alkalinity", "index": "2", "type": "numeric", "distinct": "51", "missing": "0", "min": "1", "max": "128", "mean": "38", "stdev": "38" }, { "name": "pH", "index": "3", "type": "numeric", "distinct": "34", "missing": "0", "min": "4", "max": "9", "mean": "7", "stdev": "1" }, { "name": "Calcium", "index": "4", "type": "numeric", "distinct": "48", "missing": "0", "min": "1", "max": "91", "mean": "22", "stdev": "25" }, { "name": "Chlorophyll", "index": "5", "type": "numeric", "distinct": "43", "missing": "0", "min": "1", "max": "152", "mean": "23", "stdev": "31" }, { "name": "Avg_Mercury", "index": "6", "type": "numeric", "distinct": "41", "missing": "0", "min": "0", "max": "1", "mean": "1", "stdev": "0" }, { "name": "No.samples", "index": "7", "type": "numeric", "distinct": "15", "missing": "0", "min": "4", "max": "44", "mean": "13", "stdev": "9" }, { "name": "min", "index": "8", "type": "numeric", "distinct": "32", "missing": "0", "min": "0", "max": "1", "mean": "0", "stdev": "0" }, { "name": "max", "index": "9", "type": "numeric", "distinct": "44", "missing": "0", "min": "0", "max": "2", "mean": "1", "stdev": "1" }, { "name": "age_data", "index": "11", "type": "numeric", "distinct": "2", "missing": "0", "min": "0", "max": "1", "mean": "1", "stdev": "0" } ], "nr_of_issues": 0, "nr_of_downvotes": 0, "nr_of_likes": 0, "nr_of_downloads": 0, "total_downloads": 0, "reach": 0, "reuse": 0, "impact_of_reuse": 0, "reach_of_reuse": 0, "impact": 0 }