{ "data_id": "43345", "name": "Quora_Insincere_Questions_2018", "exact_name": "Quora_Insincere_Questions_2018", "version": 1, "version_label": "v1.0", "description": "Context\nIt's the preprocessed train data from Quora Insincere Questions competition 2018 The original train data is preprocessed to remove stop words, numbers, punctuations, common words and converted to lower case. The resultant data set is lemmatised and stemmed with scikit-learn\/NLTK library.\nContent\nIt contains approximately 1.3 million rows of quora questions with target =0 for sincere questions and target=1 for insincere questions.\nAcknowledgements\nThanks for Co-learning lounge mentors to help me to work on this problem\nInspiration\nIt's very handy to build the ML models in NLP.", "format": "arff", "uploader": "Dustin Carrion", "uploader_id": 30123, "visibility": "public", "creator": null, "contributor": null, "date": "2022-03-23 12:21:16", "update_comment": null, "last_update": "2022-03-23 12:21:16", "licence": "CC0: Public Domain", "status": "active", "error_message": null, "url": "https:\/\/www.openml.org\/data\/download\/22102170\/dataset", "default_target_attribute": null, "row_id_attribute": null, "ignore_attribute": null, "runs": 0, "suggest": { "input": [ "Quora_Insincere_Questions_2018", "Context It's the preprocessed train data from Quora Insincere Questions competition 2018 The original train data is preprocessed to remove stop words, numbers, punctuations, common words and converted to lower case. The resultant data set is lemmatised and stemmed with scikit-learn\/NLTK library. Content It contains approximately 1.3 million rows of quora questions with target =0 for sincere questions and target=1 for insincere questions. Acknowledgements Thanks for Co-learning lounge mentors to " ], "weight": 5 }, "qualities": { "NumberOfInstances": 1306122, "NumberOfFeatures": 4, "NumberOfClasses": null, "NumberOfMissingValues": 1, "NumberOfInstancesWithMissingValues": 1, "NumberOfNumericFeatures": 2, "NumberOfSymbolicFeatures": 0, "Dimensionality": 3.062501052734737e-6, "PercentageOfNumericFeatures": 50, "MajorityClassPercentage": null, "PercentageOfSymbolicFeatures": 0, "MajorityClassSize": null, "MinorityClassPercentage": null, "MinorityClassSize": null, "NumberOfBinaryFeatures": 0, "PercentageOfBinaryFeatures": 0, "PercentageOfInstancesWithMissingValues": 7.656252631836842e-5, "AutoCorrelation": null, "PercentageOfMissingValues": 1.9140631579592105e-5 }, "tags": [ { "uploader": "38960", "tag": "Machine Learning" }, { "uploader": "38960", "tag": "Statistics" } ], "features": [ { "name": "Unnamed:_0", "index": "0", "type": "numeric", "distinct": "1306122", "missing": "0", "min": "0", "max": "1306121", "mean": "653061", "stdev": "377045" }, { "name": "qid", "index": "1", "type": "string", "distinct": "1306122", "missing": "0" }, { "name": "question_text", "index": "2", "type": "string", "distinct": "1304660", "missing": "1" }, { "name": "target", "index": "3", "type": "numeric", "distinct": "2", "missing": "0", "min": "0", "max": "1", "mean": "0", "stdev": "0" } ], "nr_of_issues": 0, "nr_of_downvotes": 0, "nr_of_likes": 0, "nr_of_downloads": 0, "total_downloads": 0, "reach": 0, "reuse": 0, "impact_of_reuse": 0, "reach_of_reuse": 0, "impact": 0 }