{ "data_id": "44962", "name": "forest_fires", "exact_name": "forest_fires", "version": 9, "version_label": null, "description": "**Data Description**\n\nThe aim of this dataset is to predict the burned area of forest fires, in the northeast region of Portugal, by using meteorological and other data.\n\nThe output 'area' was first transformed with a $ln(x+1)$ function. Then, several Data Mining methods were applied. After fitting the models, the outputs were post-processed with the inverse of the $ln(x+1)$ transform. Four different input setups were used.\n\n**Attribute Description**\n\n1. *X* - x-axis spatial coordinate within the Montesinho park map: 1 to 9\n2. *Y* - y-axis spatial coordinate within the Montesinho park map: 2 to 9\n3. *month* - month of the year: 'jan' to 'dec'\n4. *day* - day of the week: 'mon' to 'sun'\n5. *FFMC* - FFMC index from the FWI system: 18.7 to 96.20\n6. *DMC* - DMC index from the FWI system: 1.1 to 291.3\n7. *DC* - DC index from the FWI system: 7.9 to 860.6\n8. *ISI* - ISI index from the FWI system: 0.0 to 56.10\n9. *temp* - temperature in Celsius degrees: 2.2 to 33.30\n10. *RH* - relative humidity in %: 15.0 to 100\n11. *wind* - wind speed in km\/h: 0.40 to 9.40\n12. *rain* - outside rain in mm\/m2 : 0.0 to 6.4\n13. *area* - the burned area of the forest (in ha): 0.00 to 1090.84 (this target variable is very skewed towards 0.0, thus it may make sense to model with the logarithm transform).", "format": "arff", "uploader": "Sebastian Fischer", "uploader_id": 30127, "visibility": "public", "creator": null, "contributor": null, "date": "2022-12-22 16:11:46", "update_comment": null, "last_update": "2022-12-22 16:11:46", "licence": "CC BY 4.0", "status": "active", "error_message": null, "url": "https:\/\/api.openml.org\/data\/download\/22111826\/file22f166f718672.arff", "default_target_attribute": "area", "row_id_attribute": null, "ignore_attribute": null, "runs": 0, "suggest": { "input": [ "forest_fires", "The aim of this dataset is to predict the burned area of forest fires, in the northeast region of Portugal, by using meteorological and other data. The output 'area' was first transformed with a $ln(x+1)$ function. Then, several Data Mining methods were applied. After fitting the models, the outputs were post-processed with the inverse of the $ln(x+1)$ transform. Four different input setups were used. 1. *X* - x-axis spatial coordinate within the Montesinho park map: 1 to 9 2. *Y* - y-axis spati " ], "weight": 5 }, "qualities": { "NumberOfInstances": 517, "NumberOfFeatures": 13, "NumberOfClasses": 0, "NumberOfMissingValues": 0, "NumberOfInstancesWithMissingValues": 0, "NumberOfNumericFeatures": 11, "NumberOfSymbolicFeatures": 0, "MajorityClassSize": null, "MinorityClassPercentage": null, "MinorityClassSize": null, "NumberOfBinaryFeatures": 0, "PercentageOfBinaryFeatures": 0, "PercentageOfInstancesWithMissingValues": 0, "AutoCorrelation": -13.814806201550388, "PercentageOfMissingValues": 0, "Dimensionality": 0.025145067698259187, "PercentageOfNumericFeatures": 84.61538461538461, "MajorityClassPercentage": null, "PercentageOfSymbolicFeatures": 0 }, "tags": [ { "tag": "Demographics", "uploader": "38960" }, { "tag": "Health", "uploader": "38960" }, { "tag": "study_353", "uploader": "0" } ], "features": [ { "name": "area", "index": "12", "type": "numeric", "distinct": "251", "missing": "0", "target": "1", "min": "0", "max": "1091", "mean": "13", "stdev": "64" }, { "name": "X", "index": "0", "type": "numeric", "distinct": "9", "missing": "0", "min": "1", "max": "9", "mean": "5", "stdev": "2" }, { "name": "Y", "index": "1", "type": "numeric", "distinct": "7", "missing": "0", "min": "2", "max": "9", "mean": "4", "stdev": "1" }, { "name": "month", "index": "2", "type": "string", "distinct": "12", "missing": "0" }, { "name": "day", "index": "3", "type": "string", "distinct": "7", "missing": "0" }, { "name": "FFMC", "index": "4", "type": "numeric", "distinct": "106", "missing": "0", "min": "19", "max": "96", "mean": "91", "stdev": "6" }, { "name": "DMC", "index": "5", "type": "numeric", "distinct": "215", "missing": "0", "min": "1", "max": "291", "mean": "111", "stdev": "64" }, { "name": "DC", "index": "6", "type": "numeric", "distinct": "219", "missing": "0", "min": "8", "max": "861", "mean": "548", "stdev": "248" }, { "name": "ISI", "index": "7", "type": "numeric", "distinct": "119", "missing": "0", "min": "0", "max": "56", "mean": "9", "stdev": "5" }, { "name": "temp", "index": "8", "type": "numeric", "distinct": "192", "missing": "0", "min": "2", "max": "33", "mean": "19", "stdev": "6" }, { "name": "RH", "index": "9", "type": "numeric", "distinct": "75", "missing": "0", "min": "15", "max": "100", "mean": "44", "stdev": "16" }, { "name": "wind", "index": "10", "type": "numeric", "distinct": "21", "missing": "0", "min": "0", "max": "9", "mean": "4", "stdev": "2" }, { "name": "rain", "index": "11", "type": "numeric", "distinct": "7", "missing": "0", "min": "0", "max": "6", "mean": "0", "stdev": "0" } ], "nr_of_issues": 0, "nr_of_downvotes": 0, "nr_of_likes": 0, "nr_of_downloads": 0, "total_downloads": 0, "reach": 0, "reuse": 0, "impact_of_reuse": 0, "reach_of_reuse": 0, "impact": 0 }