{ "data_id": "597", "name": "fri_c2_500_5", "exact_name": "fri_c2_500_5", "version": 1, "version_label": null, "description": "**Author**: \n**Source**: Unknown - Date unknown \n**Please cite**: \n\nThe Friedman datasets are 80 artificially generated datasets originating from: J.H. Friedman (1999). Stochastic Gradient Boosting\n\nThe dataset names are coded as \"fri_colinearintydegree_samplenumber_featurenumber\".\n\nFriedman is the one of the most used functions for data generation (Friedman, 1999). Friedman functions include both linear and non-linear relations between input and output, and a normalized noise (e) is added to the output. The Friedman function is as follows: \n\ny=10*sin(pi*x1*x2)+20*(x3-0.5)^2=10*X4+5*X5+e\n\nIn the original Friedman function, there are 5 features for input. To measure the effects of non-related features, additional features are added to the datasets. These added features are independent from the output. However, to measure the algorithm's robustness to the colinearity, the datasets are generated with 5 different colinearity degrees. The colinearity degrees is the number of features depending on other features. \n\nThe generated Friedman dataset's parameters and values are given below: \nThe number of features: 5 10 25 50 100 (only the first 5 features are related to the output. The rest are completely random)\nThe number of samples: 100 250 500 1000\nColinearity degrees: 0 1 2 3 4\nFor the datasets with colinearity degree 4, the numbers of features are generated as 10, 25, 50 and 100.\nThe other datasets have 5, 10, 25 and 50 features.\n\nAs a result, 80 artificial datasets are generated by (4 different feature number * 4 different sample number * 5 different colinearity degree)\n\nThe last attribute in each file is the target.", "format": "ARFF", "uploader": "Joaquin Vanschoren", "uploader_id": 2, "visibility": "public", "creator": null, "contributor": null, "date": "2014-10-04 00:55:25", "update_comment": "fixed arff file (whitespace delimiters)", "last_update": "2015-04-15 22:40:52", "licence": "Public", "status": "active", "error_message": null, "url": "https:\/\/www.openml.org\/data\/download\/1390102\/phpFrlhBk", "kaggle_url": null, "default_target_attribute": "oz6", "row_id_attribute": null, "ignore_attribute": null, "runs": 0, "suggest": { "input": [ "fri_c2_500_5", "The Friedman datasets are 80 artificially generated datasets originating from: J.H. Friedman (1999). Stochastic Gradient Boosting The dataset names are coded as \"fri_colinearintydegree_samplenumber_featurenumber\". Friedman is the one of the most used functions for data generation (Friedman, 1999). Friedman functions include both linear and non-linear relations between input and output, and a normalized noise (e) is added to the output. The Friedman function is as follows: y=10*sin(pi*x1*x2)+20*( " ], "weight": 5 }, "qualities": { "NumberOfInstances": 500, "NumberOfFeatures": 6, "NumberOfClasses": 0, "NumberOfMissingValues": 0, "NumberOfInstancesWithMissingValues": 0, "NumberOfNumericFeatures": 6, "NumberOfSymbolicFeatures": 0, "MaxMutualInformation": null, "MinNominalAttDistinctValues": null, "PercentageOfBinaryFeatures": 0, "Quartile2StdDevOfNumericAtts": 1.0000000008545986, "RandomTreeDepth1AUC": null, "Dimensionality": 0.012, "MaxNominalAttDistinctValues": null, "MinSkewnessOfNumericAtts": -0.5784813454207074, "PercentageOfInstancesWithMissingValues": 0, "Quartile3AttributeEntropy": null, "RandomTreeDepth1ErrRate": null, "EquivalentNumberOfAtts": null, "MaxSkewnessOfNumericAtts": 0.6633755602127516, "MinStdDevOfNumericAtts": 0.9999999987229053, "PercentageOfMissingValues": 0, "Quartile3KurtosisOfNumericAtts": -0.5134491414732215, "AutoCorrelation": -0.11071649179358631, "RandomTreeDepth1Kappa": null, "J48.00001.AUC": null, "MaxStdDevOfNumericAtts": 1.0000000026603857, "MinorityClassPercentage": null, "PercentageOfNumericFeatures": 100, "Quartile3MeansOfNumericAtts": 5.149498483181246e-11, "CfsSubsetEval_DecisionStumpAUC": null, "RandomTreeDepth2AUC": null, "J48.00001.ErrRate": null, "MeanAttributeEntropy": null, "MinorityClassSize": null, "PercentageOfSymbolicFeatures": 0, "Quartile3MutualInformation": null, "CfsSubsetEval_DecisionStumpErrRate": null, "RandomTreeDepth2ErrRate": null, "J48.00001.Kappa": null, "MeanKurtosisOfNumericAtts": -0.8786627898178792, "NaiveBayesAUC": null, "Quartile1AttributeEntropy": null, "Quartile3SkewnessOfNumericAtts": 0.20976048215815307, "CfsSubsetEval_DecisionStumpKappa": null, "RandomTreeDepth2Kappa": null, "J48.0001.AUC": null, "MeanMeansOfNumericAtts": -4.0260001856381084e-10, "NaiveBayesErrRate": null, "Quartile1KurtosisOfNumericAtts": -1.243448253276247, "Quartile3StdDevOfNumericAtts": 1.000000002119385, "CfsSubsetEval_NaiveBayesAUC": null, "RandomTreeDepth3AUC": null, "J48.0001.ErrRate": null, "MeanMutualInformation": null, "NaiveBayesKappa": null, "Quartile1MeansOfNumericAtts": -8.957500302886334e-10, "REPTreeDepth1AUC": null, "CfsSubsetEval_NaiveBayesErrRate": null, "RandomTreeDepth3ErrRate": null, "J48.0001.Kappa": null, "MeanNoiseToSignalRatio": null, "NumberOfBinaryFeatures": 0, "Quartile1MutualInformation": null, "REPTreeDepth1ErrRate": null, "CfsSubsetEval_NaiveBayesKappa": null, "RandomTreeDepth3Kappa": null, "J48.001.AUC": null, "MeanNominalAttDistinctValues": null, "Quartile1SkewnessOfNumericAtts": -0.18492045547836322, "REPTreeDepth1Kappa": null, "CfsSubsetEval_kNN1NAUC": null, "StdvNominalAttDistinctValues": null, "J48.001.ErrRate": null, "J48.001.Kappa": null, "MeanSkewnessOfNumericAtts": 0.015318383004246889, "Quartile1StdDevOfNumericAtts": 0.9999999995035588, "REPTreeDepth2AUC": null, "CfsSubsetEval_kNN1NErrRate": null, "kNN1NAUC": null, "MajorityClassPercentage": null, "MeanStdDevOfNumericAtts": 1.0000000007992194, "Quartile2AttributeEntropy": null, "REPTreeDepth2ErrRate": null, "CfsSubsetEval_kNN1NKappa": null, "kNN1NErrRate": null, "MajorityClassSize": null, "MinAttributeEntropy": null, "Quartile2KurtosisOfNumericAtts": -1.0071977452296426, "REPTreeDepth2Kappa": null, "ClassEntropy": null, "kNN1NKappa": null, "MaxAttributeEntropy": null, "MinKurtosisOfNumericAtts": -1.262535424944865, "Quartile2MeansOfNumericAtts": -1.4789000613024683e-10, "REPTreeDepth3AUC": null, "DecisionStumpAUC": null, "MaxKurtosisOfNumericAtts": -0.11004165822818335, "MinMeansOfNumericAtts": -1.8442000405682889e-9, "Quartile2MutualInformation": null, "REPTreeDepth3ErrRate": null, "DecisionStumpErrRate": null, "MaxMeansOfNumericAtts": 3.529799827983737e-10, "MinMutualInformation": null, "Quartile2SkewnessOfNumericAtts": 0.0010970596288661025, "REPTreeDepth3Kappa": null, "DecisionStumpKappa": null }, "tags": [ { "uploader": "24659", "tag": "artificial" }, { "uploader": "38960", "tag": "Data Science" }, { "uploader": "38960", "tag": "Machine Learning" }, { "uploader": "38960", "tag": "Mathematics" }, { "uploader": "38960", "tag": "Statistics" } ], "topics": [ { "topic": "Artificial datasets", "uploader": "8111" } ], "features": [ { "name": "oz6", "index": "5", "type": "numeric", "distinct": "500", "missing": "0", "target": "1", "min": "-3", "max": "2", "mean": "0", "stdev": "1" }, { "name": "oz1", "index": "0", "type": "numeric", "distinct": "500", "missing": "0", "min": "-2", "max": "2", "mean": "0", "stdev": "1" }, { "name": "oz2", "index": "1", "type": "numeric", "distinct": "500", "missing": "0", "min": "-2", "max": "2", "mean": "0", "stdev": "1" }, { "name": "oz3", "index": "2", "type": "numeric", "distinct": "500", "missing": "0", "min": "-2", "max": "3", "mean": "0", "stdev": "1" }, { "name": "oz4", "index": "3", "type": "numeric", "distinct": "500", "missing": "0", "min": "-2", "max": "2", "mean": "0", "stdev": "1" }, { "name": "oz5", "index": "4", "type": "numeric", "distinct": "500", "missing": "0", "min": "-2", "max": "2", "mean": "0", "stdev": "1" } ], "nr_of_issues": 0, "nr_of_downvotes": 0, "nr_of_likes": 0, "nr_of_downloads": 0, "total_downloads": 0, "reach": 0, "reuse": 0, "impact_of_reuse": 0, "reach_of_reuse": 0, "impact": 0 }