{ "data_id": "43522", "name": "Multipurpose-World-News-Dataset", "exact_name": "Multipurpose-World-News-Dataset", "version": 1, "version_label": "v1.0", "description": "Content\nThis is a dataset I started building for my future personal projects, as I think this kind of data is quite hard to acquire for free and in short time. I started acquiring data on March 21st, 2020 and intend to keep doing that constantly.\nWhat you'll have inside this are news extracted from the following sources:\n\nFoxbusiness.com\nYoutube.com\nCnet.com\nThe Verge\nNytimes.com\nRawstory.com\nInvestors.com\nWreg.com\nReuters\nKoin.com\nInc.com\nCNBC, Nj.com\nWmtw.com\nNbcdfw.com\nBloomberg\nWowt.com\nBbc.com\n\nFor every 20-minute interval, a script checks for new headlines on these sources and add'em into a database. This CSV file is generated from that.\nI intend to update this dataset every day if I can (and if the machine I run this script is up).", "format": "arff", "uploader": "Onur Yildirim", "uploader_id": 30126, "visibility": "public", "creator": null, "contributor": null, "date": "2022-03-23 13:33:20", "update_comment": null, "last_update": "2022-03-23 13:33:20", "licence": "GPL 2", "status": "active", "error_message": null, "url": "https:\/\/www.openml.org\/data\/download\/22102347\/dataset", "default_target_attribute": null, "row_id_attribute": null, "ignore_attribute": "\"id\"", "runs": 0, "suggest": { "input": [ "Multipurpose-World-News-Dataset", "Content This is a dataset I started building for my future personal projects, as I think this kind of data is quite hard to acquire for free and in short time. I started acquiring data on March 21st, 2020 and intend to keep doing that constantly. What you'll have inside this are news extracted from the following sources: Foxbusiness.com Youtube.com Cnet.com The Verge Nytimes.com Rawstory.com Investors.com Wreg.com Reuters Koin.com Inc.com CNBC, Nj.com Wmtw.com Nbcdfw.com Bloomberg Wowt.com Bbc.c " ], "weight": 5 }, "qualities": { "NumberOfInstances": 193279, "NumberOfFeatures": 4, "NumberOfClasses": null, "NumberOfMissingValues": 29954, "NumberOfInstancesWithMissingValues": 29954, "NumberOfNumericFeatures": 0, "NumberOfSymbolicFeatures": 0, "Dimensionality": 2.0695471313489827e-5, "PercentageOfNumericFeatures": 0, "MajorityClassPercentage": null, "PercentageOfSymbolicFeatures": 0, "MajorityClassSize": null, "MinorityClassPercentage": null, "MinorityClassSize": null, "NumberOfBinaryFeatures": 0, "PercentageOfBinaryFeatures": 0, "PercentageOfInstancesWithMissingValues": 15.497803693106857, "AutoCorrelation": null, "PercentageOfMissingValues": 3.874450923276714 }, "tags": [ { "uploader": "38960", "tag": "Computer Systems" }, { "uploader": "38960", "tag": "Machine Learning" } ], "features": [ { "name": "id", "index": "0", "type": "numeric", "distinct": "193279", "missing": "0", "ignore": "1", "min": "1", "max": "193279", "mean": "96640", "stdev": "55795" }, { "name": "timestamp", "index": "1", "type": "string", "distinct": "164190", "missing": "0" }, { "name": "source", "index": "2", "type": "string", "distinct": "20", "missing": "0" }, { "name": "title", "index": "3", "type": "string", "distinct": "193245", "missing": "11" }, { "name": "description", "index": "4", "type": "string", "distinct": "120986", "missing": "29943" } ], "nr_of_issues": 0, "nr_of_downvotes": 0, "nr_of_likes": 0, "nr_of_downloads": 0, "total_downloads": 0, "reach": 0, "reuse": 0, "impact_of_reuse": 0, "reach_of_reuse": 0, "impact": 0 }