{ "data_id": "43683", "name": "WebMD-Drug-Reviews-Dataset", "exact_name": "WebMD-Drug-Reviews-Dataset", "version": 1, "version_label": "v1.0", "description": "Context\nThe dataset provides user reviews on specific drugs along with related conditions, side effects, age, sex, and ratings reflecting overall patient satisfaction.\nContent\nData was acquired by scraping WebMD site. There are around 0.36 million rows of unique reviews and is updated till Mar 2020.\nInspiration\nThis dataset intended to answer following questions:\nI. Identifying the condition of the patient based on drug reviews?\nII. How to predict drug rating based on patients reviews?\nIII. How to visualize drug rating, kind of drugs, types of conditions a patient can have, sentiments based on reviews", "format": "arff", "uploader": "Dustin Carrion", "uploader_id": 30123, "visibility": "public", "creator": null, "contributor": null, "date": "2022-03-24 07:12:13", "update_comment": null, "last_update": "2022-03-24 07:12:13", "licence": "CC0: Public Domain", "status": "active", "error_message": null, "url": "https:\/\/www.openml.org\/data\/download\/22102508\/dataset", "default_target_attribute": null, "row_id_attribute": null, "ignore_attribute": "\"DrugId\"", "runs": 0, "suggest": { "input": [ "WebMD-Drug-Reviews-Dataset", "Context The dataset provides user reviews on specific drugs along with related conditions, side effects, age, sex, and ratings reflecting overall patient satisfaction. Content Data was acquired by scraping WebMD site. There are around 0.36 million rows of unique reviews and is updated till Mar 2020. Inspiration This dataset intended to answer following questions: I. Identifying the condition of the patient based on drug reviews? II. How to predict drug rating based on patients reviews? III. How " ], "weight": 5 }, "qualities": { "NumberOfInstances": 362806, "NumberOfFeatures": 11, "NumberOfClasses": null, "NumberOfMissingValues": 42, "NumberOfInstancesWithMissingValues": 42, "NumberOfNumericFeatures": 4, "NumberOfSymbolicFeatures": 0, "Dimensionality": 3.031923397077226e-5, "PercentageOfNumericFeatures": 36.36363636363637, "MajorityClassPercentage": null, "PercentageOfSymbolicFeatures": 0, "MajorityClassSize": null, "MinorityClassPercentage": null, "MinorityClassSize": null, "NumberOfBinaryFeatures": 0, "PercentageOfBinaryFeatures": 0, "PercentageOfInstancesWithMissingValues": 0.011576434788840316, "AutoCorrelation": null, "PercentageOfMissingValues": 0.0010524031626218471 }, "tags": [ { "uploader": "38960", "tag": "Culture" }, { "uploader": "38960", "tag": "Geography" } ], "features": [ { "name": "Age", "index": "0", "type": "string", "distinct": "12", "missing": "0" }, { "name": "Condition", "index": "1", "type": "string", "distinct": "1806", "missing": "0" }, { "name": "Date", "index": "2", "type": "string", "distinct": "4524", "missing": "0" }, { "name": "Drug", "index": "3", "type": "string", "distinct": "7093", "missing": "0" }, { "name": "DrugId", "index": "4", "type": "numeric", "distinct": "6572", "missing": "0", "ignore": "1", "min": "1", "max": "178546", "mean": "36879", "stdev": "52624" }, { "name": "EaseofUse", "index": "5", "type": "numeric", "distinct": "7", "missing": "0", "min": "1", "max": "10", "mean": "4", "stdev": "1" }, { "name": "Effectiveness", "index": "6", "type": "numeric", "distinct": "7", "missing": "0", "min": "1", "max": "10", "mean": "4", "stdev": "1" }, { "name": "Reviews", "index": "7", "type": "string", "distinct": "250164", "missing": "42" }, { "name": "Satisfaction", "index": "8", "type": "numeric", "distinct": "7", "missing": "0", "min": "1", "max": "10", "mean": "3", "stdev": "2" }, { "name": "Sex", "index": "9", "type": "string", "distinct": "3", "missing": "0" }, { "name": "Sides", "index": "10", "type": "string", "distinct": "1651", "missing": "0" }, { "name": "UsefulCount", "index": "11", "type": "numeric", "distinct": "148", "missing": "0", "min": "0", "max": "255", "mean": "7", "stdev": "9" } ], "nr_of_issues": 0, "nr_of_downvotes": 0, "nr_of_likes": 0, "nr_of_downloads": 0, "total_downloads": 0, "reach": 0, "reuse": 0, "impact_of_reuse": 0, "reach_of_reuse": 0, "impact": 0 }