{ "data_id": "43394", "name": "Source-based-Fake-News-Classification", "exact_name": "Source-based-Fake-News-Classification", "version": 1, "version_label": "v1.0", "description": "Context\nSocial media is a vast pool of content, and among all the content available for users to access, news is an element that is accessed most frequently. These news can be posted by politicians, news channels, newspaper websites, or even common civilians. These posts have to be checked for their authenticity, since spreading misinformation has been a real concern in todays times, and many firms are taking steps to make the common people aware of the consequences of spread misinformation. The measure of authenticity of the news posted online cannot be definitively measured, since the manual classification of news is tedious and time-consuming, and is also subject to bias.\nPublished paper: http:\/\/www.ijirset.com\/upload\/2020\/june\/115_4_Source.PDF\nContent\nData preprocessing has been done on the dataset Getting Real about Fake News and skew has been eliminated.\nInspiration\nIn an era where fake WhatsApp forwards and Tweets are capable of influencing naive minds, tools and knowledge have to be put to practical use in not only mitigating the spread of misinformation but also to inform people about the type of news they consume. \nDevelopment of practical applications for users to gain insight from the articles they consume, fact-checking websites, built-in plugins and article parsers can\nfurther be refined, made easier to access, and more importantly, should create more awareness.\nAcknowledgements\nGetting Real about Fake News seemed the most promising for preprocessing, feature extraction, and model classification. \nThe reason is due to the fact that all the other datasets lacked the sources from where the article\/statement text was produced and published from. Citing the sources for article text is crucial to check the trustworthiness of the news and further helps in labelling the data as fake or untrustworthy.\nThanks to the datasets comprehensiveness in terms of citing the source information of the text along with author names, date of publication and labels.", "format": "arff", "uploader": "Onur Yildirim", "uploader_id": 30126, "visibility": "public", "creator": null, "contributor": null, "date": "2022-03-23 12:57:25", "update_comment": null, "last_update": "2022-03-23 12:57:25", "licence": "CC0: Public Domain", "status": "active", "error_message": null, "url": "https:\/\/www.openml.org\/data\/download\/22102219\/dataset", "default_target_attribute": null, "row_id_attribute": null, "ignore_attribute": null, "runs": 0, "suggest": { "input": [ "Source-based-Fake-News-Classification", "Context Social media is a vast pool of content, and among all the content available for users to access, news is an element that is accessed most frequently. These news can be posted by politicians, news channels, newspaper websites, or even common civilians. These posts have to be checked for their authenticity, since spreading misinformation has been a real concern in todays times, and many firms are taking steps to make the common people aware of the consequences of spread misinformation. The " ], "weight": 5 }, "qualities": { "NumberOfInstances": 2096, "NumberOfFeatures": 12, "NumberOfClasses": null, "NumberOfMissingValues": 104, "NumberOfInstancesWithMissingValues": 51, "NumberOfNumericFeatures": 1, "NumberOfSymbolicFeatures": 0, "Dimensionality": 0.0057251908396946565, "PercentageOfNumericFeatures": 8.333333333333332, "MajorityClassPercentage": null, "PercentageOfSymbolicFeatures": 0, "MajorityClassSize": null, "MinorityClassPercentage": null, "MinorityClassSize": null, "NumberOfBinaryFeatures": 0, "PercentageOfBinaryFeatures": 0, "PercentageOfInstancesWithMissingValues": 2.433206106870229, "AutoCorrelation": null, "PercentageOfMissingValues": 0.4134860050890585 }, "tags": [ { "uploader": "38960", "tag": "Computer Systems" }, { "uploader": "38960", "tag": "Machine Learning" } ], "features": [ { "name": "author", "index": "0", "type": "string", "distinct": "491", "missing": "0" }, { "name": "published", "index": "1", "type": "string", "distinct": "2006", "missing": "0" }, { "name": "title", "index": "2", "type": "string", "distinct": "1784", "missing": "0" }, { "name": "text", "index": "3", "type": "string", "distinct": "1941", "missing": "46" }, { "name": "language", "index": "4", "type": "string", "distinct": "5", "missing": "1" }, { "name": "site_url", "index": "5", "type": "string", "distinct": "68", "missing": "1" }, { "name": "main_img_url", "index": "6", "type": "string", "distinct": "1229", "missing": "1" }, { "name": "type", "index": "7", "type": "string", "distinct": "8", "missing": "1" }, { "name": "label", "index": "8", "type": "string", "distinct": "2", "missing": "1" }, { "name": "title_without_stopwords", "index": "9", "type": "string", "distinct": "1780", "missing": "2" }, { "name": "text_without_stopwords", "index": "10", "type": "string", "distinct": "1937", "missing": "50" }, { "name": "hasImage", "index": "11", "type": "numeric", "distinct": "2", "missing": "1", "min": "0", "max": "1", "mean": "1", "stdev": "0" } ], "nr_of_issues": 0, "nr_of_downvotes": 0, "nr_of_likes": 0, "nr_of_downloads": 0, "total_downloads": 0, "reach": 0, "reuse": 0, "impact_of_reuse": 0, "reach_of_reuse": 0, "impact": 0 }