{
    "data_id": "43522",
    "name": "Multipurpose-World-News-Dataset",
    "exact_name": "Multipurpose-World-News-Dataset",
    "version": 1,
    "version_label": "v1.0",
    "description": "Content\nThis is a dataset I started building for my future personal projects, as I think this kind of data is quite hard to acquire for free and in short time. I started acquiring data on March 21st, 2020 and intend to keep doing that constantly.\nWhat you'll have inside this are news extracted from the following sources:\n\nFoxbusiness.com\nYoutube.com\nCnet.com\nThe Verge\nNytimes.com\nRawstory.com\nInvestors.com\nWreg.com\nReuters\nKoin.com\nInc.com\nCNBC, Nj.com\nWmtw.com\nNbcdfw.com\nBloomberg\nWowt.com\nBbc.com\n\nFor every 20-minute interval, a script checks for new headlines on these sources and add'em into a database. This CSV file is generated from that.\nI intend to update this dataset every day if I can (and if the machine I run this script is up).",
    "format": "arff",
    "uploader": "Onur Yildirim",
    "uploader_id": 30126,
    "visibility": "public",
    "creator": null,
    "contributor": null,
    "date": "2022-03-23 13:33:20",
    "update_comment": null,
    "last_update": "2022-03-23 13:33:20",
    "licence": "GPL 2",
    "status": "active",
    "error_message": null,
    "url": "https:\/\/www.openml.org\/data\/download\/22102347\/dataset",
    "default_target_attribute": null,
    "row_id_attribute": null,
    "ignore_attribute": "\"id\"",
    "runs": 0,
    "suggest": {
        "input": [
            "Multipurpose-World-News-Dataset",
            "Content This is a dataset I started building for my future personal projects, as I think this kind of data is quite hard to acquire for free and in short time. I started acquiring data on March 21st, 2020 and intend to keep doing that constantly. What you'll have inside this are news extracted from the following sources: Foxbusiness.com Youtube.com Cnet.com The Verge Nytimes.com Rawstory.com Investors.com Wreg.com Reuters Koin.com Inc.com CNBC, Nj.com Wmtw.com Nbcdfw.com Bloomberg Wowt.com Bbc.c "
        ],
        "weight": 5
    },
    "qualities": {
        "NumberOfInstances": 193279,
        "NumberOfFeatures": 4,
        "NumberOfClasses": null,
        "NumberOfMissingValues": 29954,
        "NumberOfInstancesWithMissingValues": 29954,
        "NumberOfNumericFeatures": 0,
        "NumberOfSymbolicFeatures": 0,
        "Dimensionality": 2.0695471313489827e-5,
        "PercentageOfNumericFeatures": 0,
        "MajorityClassPercentage": null,
        "PercentageOfSymbolicFeatures": 0,
        "MajorityClassSize": null,
        "MinorityClassPercentage": null,
        "MinorityClassSize": null,
        "NumberOfBinaryFeatures": 0,
        "PercentageOfBinaryFeatures": 0,
        "PercentageOfInstancesWithMissingValues": 15.497803693106857,
        "AutoCorrelation": null,
        "PercentageOfMissingValues": 3.874450923276714
    },
    "tags": [
        {
            "uploader": "38960",
            "tag": "Computer Systems"
        },
        {
            "uploader": "38960",
            "tag": "Machine Learning"
        }
    ],
    "features": [
        {
            "name": "id",
            "index": "0",
            "type": "numeric",
            "distinct": "193279",
            "missing": "0",
            "ignore": "1",
            "min": "1",
            "max": "193279",
            "mean": "96640",
            "stdev": "55795"
        },
        {
            "name": "timestamp",
            "index": "1",
            "type": "string",
            "distinct": "164190",
            "missing": "0"
        },
        {
            "name": "source",
            "index": "2",
            "type": "string",
            "distinct": "20",
            "missing": "0"
        },
        {
            "name": "title",
            "index": "3",
            "type": "string",
            "distinct": "193245",
            "missing": "11"
        },
        {
            "name": "description",
            "index": "4",
            "type": "string",
            "distinct": "120986",
            "missing": "29943"
        }
    ],
    "nr_of_issues": 0,
    "nr_of_downvotes": 0,
    "nr_of_likes": 0,
    "nr_of_downloads": 0,
    "total_downloads": 0,
    "reach": 0,
    "reuse": 0,
    "impact_of_reuse": 0,
    "reach_of_reuse": 0,
    "impact": 0
}