{ "data_id": "43606", "name": "California-Environmental-Conditions-Dataset", "exact_name": "California-Environmental-Conditions-Dataset", "version": 1, "version_label": "v1.0", "description": "Context\nExplore an environmental conditions dataframe scraped from CIMIS weather stations using a selenium chromedriver. With California's wildfires setting records in 2020, it is worthwhile to explore factors that may contribute to creating at risk environments. This dataset was used in conjunction to building an XGBoost Classifier to accurately predict probability for fire given environmental condition features. \nFollowing my Fire Risk Analysis project.\nContent\n262 Station Id's correspond to California weather station IDs. Approximately 14 numerical features for exploratory data analysis. Advanced users can keep date feature for time series analysis. Target column corresponds to fires on the respective observation date, in the observation region.\nAcknowledgements\n\nCIMIS: https:\/\/cimis.water.ca.gov\/Default.aspx\n\nInspiration\nWhat additional features would be valuable in determining fire risk?\nWhat features are most important for specific models in determining target?\nIs there an accurate LSTM to determine feature predictions?\n\" to determine fire risk in the future?", "format": "arff", "uploader": "Dustin Carrion", "uploader_id": 30123, "visibility": "public", "creator": null, "contributor": null, "date": "2022-03-24 00:39:20", "update_comment": null, "last_update": "2022-03-24 00:39:20", "licence": "CC0: Public Domain", "status": "active", "error_message": null, "url": "https:\/\/www.openml.org\/data\/download\/22102431\/dataset", "default_target_attribute": "Target", "row_id_attribute": null, "ignore_attribute": null, "runs": 0, "suggest": { "input": [ "California-Environmental-Conditions-Dataset", "Context Explore an environmental conditions dataframe scraped from CIMIS weather stations using a selenium chromedriver. With California's wildfires setting records in 2020, it is worthwhile to explore factors that may contribute to creating at risk environments. This dataset was used in conjunction to building an XGBoost Classifier to accurately predict probability for fire given environmental condition features. Following my Fire Risk Analysis project. Content 262 Station Id's correspond to Ca " ], "weight": 5 }, "qualities": { "NumberOfInstances": 128125, "NumberOfFeatures": 19, "NumberOfClasses": 0, "NumberOfMissingValues": 138, "NumberOfInstancesWithMissingValues": 116, "NumberOfNumericFeatures": 16, "NumberOfSymbolicFeatures": 0, "Dimensionality": 0.00014829268292682927, "PercentageOfNumericFeatures": 84.21052631578947, "MajorityClassPercentage": null, "PercentageOfSymbolicFeatures": 0, "MajorityClassSize": null, "MinorityClassPercentage": null, "MinorityClassSize": null, "NumberOfBinaryFeatures": 0, "PercentageOfBinaryFeatures": 0, "PercentageOfInstancesWithMissingValues": 0.09053658536585366, "AutoCorrelation": 0.9957619181418002, "PercentageOfMissingValues": 0.005668806161745828 }, "tags": [ { "uploader": "38960", "tag": "Computer Systems" }, { "uploader": "38960", "tag": "Machine Learning" } ], "features": [ { "name": "Target", "index": "18", "type": "numeric", "distinct": "2", "missing": "0", "target": "1", "min": "0", "max": "1", "mean": "0", "stdev": "0" }, { "name": "Stn_Id", "index": "0", "type": "numeric", "distinct": "153", "missing": "0", "min": "2", "max": "262", "mean": "157", "stdev": "73" }, { "name": "Stn_Name", "index": "1", "type": "string", "distinct": "153", "missing": "0" }, { "name": "CIMIS_Region", "index": "2", "type": "string", "distinct": "14", "missing": "0" }, { "name": "Date", "index": "3", "type": "string", "distinct": "991", "missing": "0" }, { "name": "ETo_(in)", "index": "4", "type": "numeric", "distinct": "50", "missing": "83", "min": "0", "max": "0", "mean": "0", "stdev": "0" }, { "name": "Precip_(in)", "index": "5", "type": "numeric", "distinct": "312", "missing": "0", "min": "0", "max": "14", "mean": "0", "stdev": "0" }, { "name": "Sol_Rad_(Ly\/day)", "index": "6", "type": "numeric", "distinct": "976", "missing": "0", "min": "0", "max": "6618", "mean": "459", "stdev": "198" }, { "name": "Avg_Vap_Pres_(mBars)", "index": "7", "type": "numeric", "distinct": "333", "missing": "0", "min": "0", "max": "40", "mean": "11", "stdev": "4" }, { "name": "Max_Air_Temp_(F)", "index": "8", "type": "numeric", "distinct": "922", "missing": "3", "min": "25", "max": "124", "mean": "75", "stdev": "15" }, { "name": "Min_Air_Temp_(F)", "index": "9", "type": "numeric", "distinct": "884", "missing": "1", "min": "-5", "max": "93", "mean": "48", "stdev": "11" }, { "name": "Avg_Air_Temp_(F)", "index": "10", "type": "numeric", "distinct": "850", "missing": "5", "min": "13", "max": "107", "mean": "61", "stdev": "12" }, { "name": "Max_Rel_Hum_(%)", "index": "11", "type": "numeric", "distinct": "101", "missing": "0", "min": "0", "max": "100", "mean": "85", "stdev": "16" }, { "name": "Min_Rel_Hum_(%)", "index": "12", "type": "numeric", "distinct": "101", "missing": "0", "min": "0", "max": "100", "mean": "40", "stdev": "21" }, { "name": "Avg_Rel_Hum_(%)", "index": "13", "type": "numeric", "distinct": "101", "missing": "13", "min": "0", "max": "100", "mean": "61", "stdev": "20" }, { "name": "Dew_Point_(F)", "index": "14", "type": "numeric", "distinct": "857", "missing": "13", "min": "-74", "max": "82", "mean": "46", "stdev": "11" }, { "name": "Avg_Wind_Speed_(mph)", "index": "15", "type": "numeric", "distinct": "195", "missing": "0", "min": "1", "max": "47", "mean": "4", "stdev": "2" }, { "name": "Wind_Run_(miles)", "index": "16", "type": "numeric", "distinct": "3308", "missing": "0", "min": "16", "max": "1125", "mean": "104", "stdev": "49" }, { "name": "Avg_Soil_Temp_(F)", "index": "17", "type": "numeric", "distinct": "648", "missing": "20", "min": "32", "max": "97", "mean": "63", "stdev": "11" } ], "nr_of_issues": 0, "nr_of_downvotes": 0, "nr_of_likes": 0, "nr_of_downloads": 0, "total_downloads": 0, "reach": 0, "reuse": 0, "impact_of_reuse": 0, "reach_of_reuse": 0, "impact": 0 }