{ "data_id": "45282", "name": "AfriSenti", "exact_name": "AfriSenti", "version": 1, "version_label": "1.0", "description": "We introduce AfriSenti, which consists of 14 sentiment datasets of 110,000+ tweets in 14 African languages (Amharic, Algerian Arabic, Hausa, Igbo, Kinyarwanda, Moroccan Arabic, Mozambican Portuguese, Nigerian Pidgin, Oromo, Swahili, Tigrinya, Twi, Xitsonga, and \\yoruba) from four language families annotated by native speakers. The data was used in SemEval 2023 Task 12, the first Afro-centric SemEval shared task. We hope AfriSenti enables new work on under-represented languages. The dataset is available at https:\/\/github.com\/afrisenti-semeval\/afrisent-semeval-2023.", "format": "arff", "uploader": "Idris Abdulmumin", "uploader_id": 35734, "visibility": "public", "creator": "\"S. H. Muhammad, I. Abdulmumin, A. A. Ayele, N. Ousidhoum, D. I. Adelani, S. M. Yimam, I. S. Ahmad, et al.\"", "contributor": null, "date": "2023-05-16 16:47:59", "update_comment": null, "last_update": "2023-05-16 16:47:59", "licence": "Public", "status": "active", "error_message": null, "url": "https:\/\/api.openml.org\/data\/download\/22116250\/dataset", "default_target_attribute": "label", "row_id_attribute": null, "ignore_attribute": null, "runs": 0, "suggest": { "input": [ "AfriSenti", "We introduce AfriSenti, which consists of 14 sentiment datasets of 110,000+ tweets in 14 African languages (Amharic, Algerian Arabic, Hausa, Igbo, Kinyarwanda, Moroccan Arabic, Mozambican Portuguese, Nigerian Pidgin, Oromo, Swahili, Tigrinya, Twi, Xitsonga, and \\yoruba) from four language families annotated by native speakers. The data was used in SemEval 2023 Task 12, the first Afro-centric SemEval shared task. We hope AfriSenti enables new work on under-represented languages. The dataset is av " ], "weight": 5 }, "qualities": { "NumberOfInstances": 111720, "NumberOfFeatures": 4, "NumberOfClasses": 3, "NumberOfMissingValues": 0, "NumberOfInstancesWithMissingValues": 0, "NumberOfNumericFeatures": 0, "NumberOfSymbolicFeatures": 2, "PercentageOfBinaryFeatures": 0, "PercentageOfInstancesWithMissingValues": 0, "AutoCorrelation": 1, "PercentageOfMissingValues": 0, "Dimensionality": 3.580379520229144e-5, "PercentageOfNumericFeatures": 0, "MajorityClassPercentage": 35.04385964912281, "PercentageOfSymbolicFeatures": 50, "MajorityClassSize": 39151, "MinorityClassPercentage": 32.22789115646258, "MinorityClassSize": 36005, "NumberOfBinaryFeatures": 0 }, "tags": [ { "uploader": "38960", "tag": "Machine Learning" } ], "features": [ { "name": "label", "index": "3", "type": "string", "distinct": "3", "missing": "0", "target": "1" }, { "name": "language", "index": "0", "type": "nominal", "distinct": "14", "missing": "0", "distr": [] }, { "name": "split", "index": "1", "type": "nominal", "distinct": "3", "missing": "0", "distr": [] }, { "name": "text", "index": "2", "type": "string", "distinct": "107887", "missing": "0" } ], "nr_of_issues": 0, "nr_of_downvotes": 0, "nr_of_likes": 0, "nr_of_downloads": 0, "total_downloads": 0, "reach": 0, "reuse": 0, "impact_of_reuse": 0, "reach_of_reuse": 0, "impact": 0 }