{ "data_id": "45962", "name": "(Non-)depressive_tweet_data", "exact_name": "(Non-)depressive_tweet_data", "version": 2, "version_label": null, "description": "Description:\nThe dataset, named \"clean_tweet_Dec19ToDec20.csv,\" comprises a collection of tweets post-processed for clarity and analysis, spanning from December 2019 to December 2020. It is designed to provide insights into public sentiment during this period, capturing a unique blend of personal and societal narratives emerging from various global circumstances, including the COVID-19 pandemic. This dataset is structured into columns that include an index for unique identification, the raw text of each tweet, and a sentiment score.\n\nAttribute Description:\n- Index: A numerical identifier assigned to each tweet, e.g., 98655, 59794.\n- Text: Contains the cleaned and processed text of the tweet. This column captures a wide range of topics, from personal appliance purchases and mental health advice to discussions on electricity waste, unemployment, and even cryptocurrency-related dietary suggestions.\n- Sentiment: A numerical sentiment score assigned to each tweet, where 0 indicates a negative sentiment and 1 indicates a positive sentiment. This binary classification assists in sentiment analysis, offering a simplistic yet effective insight into the general mood of each tweet.\n\nUse Case:\nThis dataset can be instrumental for researchers and data scientists focusing on natural language processing (NLP), sentiment analysis, and trend spotting. It offers a rich resource for training machine learning models aimed at understanding public sentiment, detecting shifts in societal concerns or interests over time, and exploring the correlation between external events and public mood. Additionally, marketing professionals might leverage this dataset to gauge consumer sentiment, optimize brand communication strategies, and identify potential areas for product or service improvements based on public feedback.", "format": "arff", "uploader": "Iwo Godzwon", "uploader_id": 39999, "visibility": "public", "creator": "\"Mann S\"", "contributor": "\"None\"", "date": "2024-05-15 14:55:24", "update_comment": null, "last_update": "2024-05-15 14:55:24", "licence": "Attribution (CC BY)", "status": "active", "error_message": null, "url": "https:\/\/api.openml.org\/data\/download\/22120405\/dataset", "kaggle_url": null, "default_target_attribute": null, "row_id_attribute": null, "ignore_attribute": null, "runs": 0, "suggest": { "input": [ "(Non-)depressive_tweet_data", "Description: The dataset, named \"clean_tweet_Dec19ToDec20.csv,\" comprises a collection of tweets post-processed for clarity and analysis, spanning from December 2019 to December 2020. It is designed to provide insights into public sentiment during this period, capturing a unique blend of personal and societal narratives emerging from various global circumstances, including the COVID-19 pandemic. This dataset is structured into columns that include an index for unique identification, the raw text " ], "weight": 5 }, "qualities": { "NumberOfInstances": 134348, "NumberOfFeatures": 3, "NumberOfClasses": null, "NumberOfMissingValues": 18, "NumberOfInstancesWithMissingValues": 18, "NumberOfNumericFeatures": 0, "NumberOfSymbolicFeatures": 1, "PercentageOfBinaryFeatures": 33.33333333333333, "PercentageOfInstancesWithMissingValues": 0.013398040908684907, "PercentageOfMissingValues": 0.004466013636228302, "AutoCorrelation": null, "PercentageOfNumericFeatures": 0, "Dimensionality": 2.2330068181141513e-5, "PercentageOfSymbolicFeatures": 33.33333333333333, "MajorityClassPercentage": null, "MajorityClassSize": null, "MinorityClassPercentage": null, "MinorityClassSize": null, "NumberOfBinaryFeatures": 1 }, "tags": [], "features": [ { "name": "Index", "index": "0", "type": "string", "distinct": "134348", "missing": "0" }, { "name": "text", "index": "1", "type": "string", "distinct": "124016", "missing": "18" }, { "name": "sentiment", "index": "2", "type": "nominal", "distinct": "2", "missing": "0", "distr": [] } ], "nr_of_issues": 0, "nr_of_downvotes": 0, "nr_of_likes": 0, "nr_of_downloads": 0, "total_downloads": 0, "reach": 0, "reuse": 0, "impact_of_reuse": 0, "reach_of_reuse": 0, "impact": 0 }