{ "data_id": "43373", "name": "Reddit-WallStreetBets-Posts", "exact_name": "Reddit-WallStreetBets-Posts", "version": 1, "version_label": "v1.0", "description": "Context\nWallStreetBets (r\/wallstreetbets, also known as WSB), is a subreddit where participants discuss stock and option trading. It has become notable for its profane nature and allegations of users manipulating securities. \nRecently the community became mainstream again with its interest on GameStop shares. \nThe data might contain a small percent of harsh language, the posts were not filtered.\nContent\nReddit posts from subreddit WallStreetBets, downloaded from https:\/\/www.reddit.com\/r\/wallstreetbets\/ using praw (The Python Reddit API Wrapper).\nInspiration\nYou can use the data to:\n\nPerform sentiment analysis; \nIdentify discussion topics; \nFollow the trends (like appearance of keywords as GME, AMP, NOK and whatever other trends are actual in the data).", "format": "arff", "uploader": "Onur Yildirim", "uploader_id": 30126, "visibility": "public", "creator": null, "contributor": null, "date": "2022-03-23 12:43:43", "update_comment": null, "last_update": "2022-03-23 12:43:43", "licence": "CC0: Public Domain", "status": "active", "error_message": null, "url": "https:\/\/www.openml.org\/data\/download\/22102198\/dataset", "default_target_attribute": null, "row_id_attribute": null, "ignore_attribute": "\"id\"", "runs": 0, "suggest": { "input": [ "Reddit-WallStreetBets-Posts", "Context WallStreetBets (r\/wallstreetbets, also known as WSB), is a subreddit where participants discuss stock and option trading. It has become notable for its profane nature and allegations of users manipulating securities. Recently the community became mainstream again with its interest on GameStop shares. The data might contain a small percent of harsh language, the posts were not filtered. Content Reddit posts from subreddit WallStreetBets, downloaded from https:\/\/www.reddit.com\/r\/wallstreet " ], "weight": 5 }, "qualities": { "NumberOfInstances": 53187, "NumberOfFeatures": 7, "NumberOfClasses": null, "NumberOfMissingValues": 28655, "NumberOfInstancesWithMissingValues": 28543, "NumberOfNumericFeatures": 3, "NumberOfSymbolicFeatures": 0, "Dimensionality": 0.0001316111079775133, "PercentageOfNumericFeatures": 42.857142857142854, "MajorityClassPercentage": null, "PercentageOfSymbolicFeatures": 0, "MajorityClassSize": null, "MinorityClassPercentage": null, "MinorityClassSize": null, "NumberOfBinaryFeatures": 0, "PercentageOfBinaryFeatures": 0, "PercentageOfInstancesWithMissingValues": 53.665369357173745, "AutoCorrelation": null, "PercentageOfMissingValues": 7.696563875705395 }, "tags": [ { "uploader": "38960", "tag": "Computer Systems" }, { "uploader": "38960", "tag": "Machine Learning" } ], "features": [ { "name": "title", "index": "0", "type": "string", "distinct": "51750", "missing": "130" }, { "name": "score", "index": "1", "type": "numeric", "distinct": "5210", "missing": "0", "min": "0", "max": "348241", "mean": "1382", "stdev": "7999" }, { "name": "id", "index": "2", "type": "string", "distinct": "53187", "missing": "0", "ignore": "1" }, { "name": "url", "index": "3", "type": "string", "distinct": "53172", "missing": "0" }, { "name": "comms_num", "index": "4", "type": "numeric", "distinct": "2045", "missing": "0", "min": "0", "max": "93268", "mean": "263", "stdev": "2533" }, { "name": "created", "index": "5", "type": "numeric", "distinct": "43460", "missing": "0", "min": "1601340416", "max": "1629095180", "mean": "1614581533", "stdev": "4173566" }, { "name": "body", "index": "6", "type": "string", "distinct": "24077", "missing": "28525" }, { "name": "timestamp", "index": "7", "type": "string", "distinct": "43460", "missing": "0" } ], "nr_of_issues": 0, "nr_of_downvotes": 0, "nr_of_likes": 0, "nr_of_downloads": 0, "total_downloads": 0, "reach": 0, "reuse": 0, "impact_of_reuse": 0, "reach_of_reuse": 0, "impact": 0 }