{ "data_id": "43353", "name": "Capitol-Riot-Tweets", "exact_name": "Capitol-Riot-Tweets", "version": 1, "version_label": "v1.0", "description": "A csv file with 80,000+ tweets from January 6th, 2021 -- the day of the capitol hill riots. Made using the Twitter Developer API + Tweepy.\nNowhere close to the size of the Parler data dumps, but anyone with NLP experience might be able to find something useful here.\n\ntweets have mentions, hyperlinks, emojis, and punctuation removed. All text is converted to lowercase.\nSome tweets have coordinates (if users had geotagging enabled).\nVerified users have their usernames included\n\"user location\" is the user's self reported location in their profile. Blank if it doesn't correspond to a US state (or DC)", "format": "arff", "uploader": "Elif Ceren Gok", "uploader_id": 30125, "visibility": "public", "creator": null, "contributor": null, "date": "2022-03-23 12:35:08", "update_comment": null, "last_update": "2022-03-23 12:35:08", "licence": "CC0: Public Domain", "status": "active", "error_message": null, "url": "https:\/\/www.openml.org\/data\/download\/22102178\/dataset", "default_target_attribute": null, "row_id_attribute": null, "ignore_attribute": null, "runs": 0, "suggest": { "input": [ "Capitol-Riot-Tweets", "A csv file with 80,000+ tweets from January 6th, 2021 -- the day of the capitol hill riots. Made using the Twitter Developer API + Tweepy. Nowhere close to the size of the Parler data dumps, but anyone with NLP experience might be able to find something useful here. tweets have mentions, hyperlinks, emojis, and punctuation removed. All text is converted to lowercase. Some tweets have coordinates (if users had geotagging enabled). Verified users have their usernames included \"user location\" is th " ], "weight": 5 }, "qualities": { "NumberOfInstances": 82309, "NumberOfFeatures": 14, "NumberOfClasses": null, "NumberOfMissingValues": 392323, "NumberOfInstancesWithMissingValues": 82296, "NumberOfNumericFeatures": 8, "NumberOfSymbolicFeatures": 0, "Dimensionality": 0.00017009075556743492, "PercentageOfNumericFeatures": 57.14285714285714, "MajorityClassPercentage": null, "PercentageOfSymbolicFeatures": 0, "MajorityClassSize": null, "MinorityClassPercentage": null, "MinorityClassSize": null, "NumberOfBinaryFeatures": 0, "PercentageOfBinaryFeatures": 0, "PercentageOfInstancesWithMissingValues": 99.98420585841158, "AutoCorrelation": null, "PercentageOfMissingValues": 34.04618137575651 }, "tags": [ { "uploader": "38960", "tag": "Computer Systems" }, { "uploader": "38960", "tag": "Machine Learning" } ], "features": [ { "name": "tweet_id", "index": "0", "type": "numeric", "distinct": "81987", "missing": "0", "min": "2147483647", "max": "2147483647", "mean": "2147483647", "stdev": "2147483647" }, { "name": "text", "index": "1", "type": "string", "distinct": "39078", "missing": "1" }, { "name": "query", "index": "2", "type": "string", "distinct": "16", "missing": "0" }, { "name": "user_id", "index": "3", "type": "numeric", "distinct": "72442", "missing": "0", "min": "1585", "max": "2147483647", "mean": "2147483647", "stdev": "2147483647" }, { "name": "user_name", "index": "4", "type": "string", "distinct": "2071", "missing": "79831" }, { "name": "follower_count", "index": "5", "type": "numeric", "distinct": "10791", "missing": "0", "min": "0", "max": "52085817", "mean": "14763", "stdev": "372574" }, { "name": "user_tweet_count", "index": "6", "type": "numeric", "distinct": "41197", "missing": "0", "min": "1", "max": "5444440", "mean": "44784", "stdev": "104094" }, { "name": "likes", "index": "7", "type": "numeric", "distinct": "353", "missing": "0", "min": "0", "max": "1070310", "mean": "43", "stdev": "4791" }, { "name": "retweets", "index": "8", "type": "numeric", "distinct": "1702", "missing": "0", "min": "0", "max": "795857", "mean": "2645", "stdev": "8193" }, { "name": "location_name", "index": "9", "type": "string", "distinct": "207", "missing": "82024" }, { "name": "longitude", "index": "10", "type": "numeric", "distinct": "207", "missing": "82024", "min": "-179", "max": "0", "mean": "-91", "stdev": "16" }, { "name": "latitude", "index": "11", "type": "numeric", "distinct": "207", "missing": "82024", "min": "13", "max": "48", "mean": "37", "stdev": "6" }, { "name": "user_location", "index": "12", "type": "string", "distinct": "52", "missing": "66419" }, { "name": "date", "index": "13", "type": "string", "distinct": "1", "missing": "0" } ], "nr_of_issues": 0, "nr_of_downvotes": 0, "nr_of_likes": 0, "nr_of_downloads": 0, "total_downloads": 0, "reach": 0, "reuse": 0, "impact_of_reuse": 0, "reach_of_reuse": 0, "impact": 0 }