{
    "data_id": "43794",
    "name": "Tweets-with-keyword-lockdown-in-April-July-2020",
    "exact_name": "Tweets-with-keyword-lockdown-in-April-July-2020",
    "version": 1,
    "version_label": "v1.0",
    "description": "Context\nThis data was collected to be used with an academic project of mine. The project was about sentiment analysis of tweets during lockdown.\nContent\nI used the GetOldTweets3 (https:\/\/pypi.org\/project\/GetOldTweets3\/) python3 library to pull the tweets off Twitter. The tweets range between 1 April 2020 to 1 August 2020, which was the peak lockdown period in India. Tweets with duplicate text and NaN values and that was the only cleaning I did on the data.\nTotal rows of tweets: 95488\nColumns:\n\nIndex (be sure to use df = pandas.read_csv(\"tweets_lockdown.csv\", index_col=0))\nText - The text of the tweet\nDate - Date and time of tweet in datetime format\nRetweets - Number of retweets for the tweet\nFavorites - Favorites on the tweet\nMentions - Usernames mentioned in the tweets in  format\nHashTags - Hashtags present in the tweet in  format\n\n\"Top Tweets\" attribute was turned off while scraping.\nInspiration\nTwitter data gives us a lot of scope for data cleaning, text preprocessing, association rule mining, sentiment analysis and so on.",
    "format": "arff",
    "uploader": "Elif Ceren Gok",
    "uploader_id": 30125,
    "visibility": "public",
    "creator": null,
    "contributor": null,
    "date": "2022-03-24 10:33:50",
    "update_comment": null,
    "last_update": "2022-03-24 10:33:50",
    "licence": "CC0: Public Domain",
    "status": "active",
    "error_message": null,
    "url": "https:\/\/www.openml.org\/data\/download\/22102619\/dataset",
    "default_target_attribute": null,
    "row_id_attribute": null,
    "ignore_attribute": null,
    "runs": 0,
    "suggest": {
        "input": [
            "Tweets-with-keyword-lockdown-in-April-July-2020",
            "Context This data was collected to be used with an academic project of mine. The project was about sentiment analysis of tweets during lockdown. Content I used the GetOldTweets3 (https:\/\/pypi.org\/project\/GetOldTweets3\/) python3 library to pull the tweets off Twitter. The tweets range between 1 April 2020 to 1 August 2020, which was the peak lockdown period in India. Tweets with duplicate text and NaN values and that was the only cleaning I did on the data. Total rows of tweets: 95488 Columns: In "
        ],
        "weight": 5
    },
    "qualities": {
        "NumberOfInstances": 95488,
        "NumberOfFeatures": 7,
        "NumberOfClasses": null,
        "NumberOfMissingValues": 160244,
        "NumberOfInstancesWithMissingValues": 90899,
        "NumberOfNumericFeatures": 3,
        "NumberOfSymbolicFeatures": 0,
        "Dimensionality": 7.330764075067024e-5,
        "PercentageOfNumericFeatures": 42.857142857142854,
        "MajorityClassPercentage": null,
        "PercentageOfSymbolicFeatures": 0,
        "MajorityClassSize": null,
        "MinorityClassPercentage": null,
        "MinorityClassSize": null,
        "NumberOfBinaryFeatures": 0,
        "PercentageOfBinaryFeatures": 0,
        "PercentageOfInstancesWithMissingValues": 95.19416052278821,
        "AutoCorrelation": null,
        "PercentageOfMissingValues": 23.97369302949062
    },
    "tags": [
        {
            "uploader": "38960",
            "tag": "Life Science"
        },
        {
            "uploader": "38960",
            "tag": "Medicine"
        }
    ],
    "features": [
        {
            "name": "Unnamed:_0",
            "index": "0",
            "type": "numeric",
            "distinct": "95488",
            "missing": "0",
            "min": "0",
            "max": "95487",
            "mean": "47744",
            "stdev": "27565"
        },
        {
            "name": "Text",
            "index": "1",
            "type": "string",
            "distinct": "95344",
            "missing": "19"
        },
        {
            "name": "Date",
            "index": "2",
            "type": "string",
            "distinct": "58281",
            "missing": "0"
        },
        {
            "name": "Retweets",
            "index": "3",
            "type": "numeric",
            "distinct": "321",
            "missing": "0",
            "min": "0",
            "max": "4680",
            "mean": "2",
            "stdev": "39"
        },
        {
            "name": "Favorites",
            "index": "4",
            "type": "numeric",
            "distinct": "607",
            "missing": "0",
            "min": "0",
            "max": "23953",
            "mean": "10",
            "stdev": "193"
        },
        {
            "name": "Mentions",
            "index": "5",
            "type": "string",
            "distinct": "8729",
            "missing": "82588"
        },
        {
            "name": "HashTags",
            "index": "6",
            "type": "string",
            "distinct": "12795",
            "missing": "77637"
        }
    ],
    "nr_of_issues": 0,
    "nr_of_downvotes": 0,
    "nr_of_likes": 0,
    "nr_of_downloads": 0,
    "total_downloads": 0,
    "reach": 0,
    "reuse": 0,
    "impact_of_reuse": 0,
    "reach_of_reuse": 0,
    "impact": 0
}