{ "data_id": "46085", "name": "Water_Quality", "exact_name": "Water_Quality", "version": 1, "version_label": null, "description": "Description:\nThe dataset named \"water-quality-1.csv\" comprises a comprehensive collection of water quality measurements from various sites, meticulously recorded to monitor environmental health and pollution levels. It encompasses a diverse range of parameters such as Fecal Coliform, Conductivity Field, Temperature, Total Nitrogen, and Nitrite + Nitrate Nitrogen, crucial for assessing water quality in ecosystems. The data spans multiple years, offering insights into temporal changes affecting water bodies.\n\nAttribute Description:\n- Sample ID: Unique identifier for each sample (e.g., 58086).\n- Grab ID: Identifier for the specific collection instance, with some entries missing.\n- Profile ID: Unique profile number associated with each sample site (e.g., 46937).\n- Sample Number: A distinct code for each sample, combining letters and numbers (e.g., 'L47270-122').\n- Collect DateTime: Date and time when the sample was collected, in MM\/DD\/YYYY HH:MM:SS AM\/PM format.\n- Depth (m): Depth at which the sample was collected, in meters (e.g., 1.0).\n- Site Type: Classification of the water body from which the sample was taken (e.g., Large Lakes).\n- Area: Geographic location or name of the water body (e.g., Central Puget Sound).\n- Locator: A unique code for the site's location (e.g., KTHA03).\n- Site: Detailed description of the sample location (e.g., Lake Sammamish near Issaquah Creek).\n- Parameter: The water quality parameter measured (e.g., Fecal Coliform).\n- Value: The measured value for the parameter, with some missing entries.\n- Units: Measurement units for the parameter values (e.g., umhos\/cm).\n- QualityId: A numerical value indicating the quality of the data (e.g., 2).\n- Lab Qualifier, MDL, RDL, Text Value, Sample Info, Steward Note, Replicates, Replicate Of, Method, Date Analyzed, Data Source: These fields contain additional information about the laboratory procedures, data quality, analysis methods, and sources.\n\nUse Case:\nThis dataset is invaluable for researchers and environmentalists looking to study water quality trends, identify pollution hotspots, and evaluate the effectiveness of environmental policies over time. It can aid in comparative analysis across different water bodies and help in the formulation of strategies for water conservation and pollution control. Moreover, policymakers can utilize this data to enforce environmental regulations and initiate cleanup efforts in degraded aquatic ecosystems.", "format": "arff", "uploader": "Iwo Godzwon", "uploader_id": 39999, "visibility": "public", "creator": "\"None\"", "contributor": "\"Sukhmandeep Singh Brar\"", "date": "2024-05-31 14:11:55", "update_comment": null, "last_update": "2024-05-31 14:11:55", "licence": "Public Domain (CC0)", "status": "active", "error_message": null, "url": "https:\/\/api.openml.org\/data\/download\/22120529\/dataset", "kaggle_url": null, "default_target_attribute": null, "row_id_attribute": null, "ignore_attribute": null, "runs": 0, "suggest": { "input": [ "Water_Quality", "Description: The dataset named \"water-quality-1.csv\" comprises a comprehensive collection of water quality measurements from various sites, meticulously recorded to monitor environmental health and pollution levels. It encompasses a diverse range of parameters such as Fecal Coliform, Conductivity Field, Temperature, Total Nitrogen, and Nitrite + Nitrate Nitrogen, crucial for assessing water quality in ecosystems. The data spans multiple years, offering insights into temporal changes affecting wa " ], "weight": 5 }, "qualities": { "NumberOfInstances": 1259444, "NumberOfFeatures": 25, "NumberOfClasses": null, "NumberOfMissingValues": 10222268, "NumberOfInstancesWithMissingValues": 1259444, "NumberOfNumericFeatures": 10, "NumberOfSymbolicFeatures": 0, "PercentageOfBinaryFeatures": 0, "PercentageOfInstancesWithMissingValues": 100, "PercentageOfMissingValues": 32.4659706981811, "AutoCorrelation": null, "PercentageOfNumericFeatures": 40, "Dimensionality": 1.9850029060442544e-5, "PercentageOfSymbolicFeatures": 0, "MajorityClassPercentage": null, "MajorityClassSize": null, "MinorityClassPercentage": null, "MinorityClassSize": null, "NumberOfBinaryFeatures": 0 }, "tags": [], "features": [ { "name": "Sample ID", "index": "0", "type": "numeric", "distinct": "154694", "missing": "0", "min": "531", "max": "186033", "mean": "80721", "stdev": "51682" }, { "name": "Grab ID", "index": "1", "type": "numeric", "distinct": "112985", "missing": "376778", "min": "700", "max": "186033", "mean": "88474", "stdev": "46350" }, { "name": "Profile ID", "index": "2", "type": "numeric", "distinct": "54951", "missing": "0", "min": "4", "max": "79119", "mean": "42654", "stdev": "16600" }, { "name": "Sample Number", "index": "3", "type": "string", "distinct": "154694", "missing": "0" }, { "name": "Collect DateTime", "index": "4", "type": "string", "distinct": "102284", "missing": "0" }, { "name": "Depth (m)", "index": "5", "type": "numeric", "distinct": "646", "missing": "376778", "min": "0", "max": "201", "mean": "11", "stdev": "14" }, { "name": "Site Type", "index": "6", "type": "string", "distinct": "6", "missing": "0" }, { "name": "Area", "index": "7", "type": "string", "distinct": "67", "missing": "133" }, { "name": "Locator", "index": "8", "type": "string", "distinct": "180", "missing": "0" }, { "name": "Site", "index": "9", "type": "string", "distinct": "178", "missing": "0" }, { "name": "Parameter", "index": "10", "type": "string", "distinct": "47", "missing": "0" }, { "name": "Value", "index": "11", "type": "numeric", "distinct": "6012", "missing": "109085", "min": "-2", "max": "1000000", "mean": "153", "stdev": "6204" }, { "name": "Units", "index": "12", "type": "string", "distinct": "23", "missing": "780" }, { "name": "QualityId", "index": "13", "type": "numeric", "distinct": "8", "missing": "0", "min": "0", "max": "9", "mean": "2", "stdev": "1" }, { "name": "Lab Qualifier", "index": "14", "type": "string", "distinct": "51", "missing": "1110071" }, { "name": "MDL", "index": "15", "type": "numeric", "distinct": "165", "missing": "651711", "min": "0", "max": "100", "mean": "0", "stdev": "0" }, { "name": "RDL", "index": "16", "type": "numeric", "distinct": "471", "missing": "653298", "min": "0", "max": "60", "mean": "2", "stdev": "3" }, { "name": "Text Value", "index": "17", "type": "string", "distinct": "24856", "missing": "1030752" }, { "name": "Sample Info", "index": "18", "type": "string", "distinct": "353", "missing": "1256301" }, { "name": "Steward Note", "index": "19", "type": "string", "distinct": "64", "missing": "1258764" }, { "name": "Replicates", "index": "20", "type": "numeric", "distinct": "202", "missing": "1257803", "min": "2824", "max": "185802", "mean": "105815", "stdev": "55903" }, { "name": "Replicate Of", "index": "21", "type": "numeric", "distinct": "202", "missing": "1257913", "min": "2153", "max": "185803", "mean": "112754", "stdev": "50283" }, { "name": "Method", "index": "22", "type": "string", "distinct": "202", "missing": "190439" }, { "name": "Date Analyzed", "index": "23", "type": "string", "distinct": "4610", "missing": "691662" }, { "name": "Data Source", "index": "24", "type": "string", "distinct": "1", "missing": "0" } ], "nr_of_issues": 0, "nr_of_downvotes": 0, "nr_of_likes": 0, "nr_of_downloads": 0, "total_downloads": 0, "reach": 0, "reuse": 0, "impact_of_reuse": 0, "reach_of_reuse": 0, "impact": 0 }