{ "data_id": "43370", "name": "Dutch-News-Articles", "exact_name": "Dutch-News-Articles", "version": 1, "version_label": "v1.0", "description": "Dutch News Articles\nThis dataset contains all the articles published by the NOS as of the 1st of January 2010. The data is obtained by scraping the NOS website. The NOS is one of the biggest (online) news organizations in the Netherlands.\nFeatures:\n\ndatetime: date and time of publication of the article.\ntitle: the title of the news article.\ncontent: the content of the news article.\ncategory: the category under which the NOS filed the article.\nurl: link to the original article.\n\nAbout the data\nThe title and content of features somewhat clean. Meaning extra whites spaces and newlines are removed. Furthermore, these features are normalized (NFKD). The NOS also publishes liveblogs. The posts in this live blog are not part of this dataset. \nExample\nI used this dataset in a recent blog post.", "format": "arff", "uploader": "Elif Ceren Gok", "uploader_id": 30125, "visibility": "public", "creator": null, "contributor": null, "date": "2022-03-23 12:43:21", "update_comment": null, "last_update": "2022-03-23 12:43:21", "licence": "CC BY-NC-SA 4.0", "status": "active", "error_message": null, "url": "https:\/\/www.openml.org\/data\/download\/22102195\/dataset", "default_target_attribute": null, "row_id_attribute": null, "ignore_attribute": null, "runs": 0, "suggest": { "input": [ "Dutch-News-Articles", "Dutch News Articles This dataset contains all the articles published by the NOS as of the 1st of January 2010. The data is obtained by scraping the NOS website. The NOS is one of the biggest (online) news organizations in the Netherlands. Features: datetime: date and time of publication of the article. title: the title of the news article. content: the content of the news article. category: the category under which the NOS filed the article. url: link to the original article. About the data The " ], "weight": 5 }, "qualities": { "NumberOfInstances": 237861, "NumberOfFeatures": 5, "NumberOfClasses": null, "NumberOfMissingValues": 0, "NumberOfInstancesWithMissingValues": 0, "NumberOfNumericFeatures": 0, "NumberOfSymbolicFeatures": 0, "Dimensionality": 2.1020680145126775e-5, "PercentageOfNumericFeatures": 0, "MajorityClassPercentage": null, "PercentageOfSymbolicFeatures": 0, "MajorityClassSize": null, "MinorityClassPercentage": null, "MinorityClassSize": null, "NumberOfBinaryFeatures": 0, "PercentageOfBinaryFeatures": 0, "PercentageOfInstancesWithMissingValues": 0, "AutoCorrelation": null, "PercentageOfMissingValues": 0 }, "tags": [ { "uploader": "38960", "tag": "Machine Learning" }, { "uploader": "38960", "tag": "Mathematics" } ], "features": [ { "name": "datetime", "index": "0", "type": "string", "distinct": "233814", "missing": "0" }, { "name": "title", "index": "1", "type": "string", "distinct": "235655", "missing": "0" }, { "name": "content", "index": "2", "type": "string", "distinct": "237528", "missing": "0" }, { "name": "category", "index": "3", "type": "string", "distinct": "9", "missing": "0" }, { "name": "url", "index": "4", "type": "string", "distinct": "237784", "missing": "0" } ], "nr_of_issues": 0, "nr_of_downvotes": 0, "nr_of_likes": 0, "nr_of_downloads": 0, "total_downloads": 0, "reach": 0, "reuse": 0, "impact_of_reuse": 0, "reach_of_reuse": 0, "impact": 0 }