{ "data_id": "43544", "name": "Harry-Potter-fanfiction-data", "exact_name": "Harry-Potter-fanfiction-data", "version": 1, "version_label": "v1.0", "description": "Context\nHuge Harry Potter fan. Wanted to collect fan-fiction data to make a dashboard and visualize it. Its in the works. \nContent\nI scraped this data from https:\/\/www.fanfiction.net\/book\/Harry-Potter\/ using requests and beautiful soup. The data is completely structured. The scraping code can be found at https:\/\/github.com\/nt03\/HarryPotter_fanfics\/tree\/master\/ffnet\nIt contains all HP Fanfic entries written between 2001-2019 in all available languages. The data doesn't contain the story itself but just the story blurb.\nAcknowledgements\nThe code is entirely mine. The thumbnail and banner are attributed to [Photo by Christian Wagner on Unsplash]\nInspiration\nYou can answer questions like 'which is the most popular pairing', which language has the most ffs written in it, what has been the general trend like since the last movie or book came out.", "format": "arff", "uploader": "Onur Yildirim", "uploader_id": 30126, "visibility": "public", "creator": null, "contributor": null, "date": "2022-03-23 13:48:13", "update_comment": null, "last_update": "2022-03-23 13:48:13", "licence": "CC0: Public Domain", "status": "active", "error_message": null, "url": "https:\/\/www.openml.org\/data\/download\/22102369\/dataset", "default_target_attribute": null, "row_id_attribute": null, "ignore_attribute": "\"Updated\"", "runs": 0, "suggest": { "input": [ "Harry-Potter-fanfiction-data", "Context Huge Harry Potter fan. Wanted to collect fan-fiction data to make a dashboard and visualize it. Its in the works. Content I scraped this data from https:\/\/www.fanfiction.net\/book\/Harry-Potter\/ using requests and beautiful soup. The data is completely structured. The scraping code can be found at https:\/\/github.com\/nt03\/HarryPotter_fanfics\/tree\/master\/ffnet It contains all HP Fanfic entries written between 2001-2019 in all available languages. The data doesn't contain the story itself but " ], "weight": 5 }, "qualities": { "NumberOfInstances": 648493, "NumberOfFeatures": 16, "NumberOfClasses": null, "NumberOfMissingValues": 647586, "NumberOfInstancesWithMissingValues": 374277, "NumberOfNumericFeatures": 2, "NumberOfSymbolicFeatures": 0, "Dimensionality": 2.4672587059536495e-5, "PercentageOfNumericFeatures": 12.5, "MajorityClassPercentage": null, "PercentageOfSymbolicFeatures": 0, "MajorityClassSize": null, "MinorityClassPercentage": null, "MinorityClassSize": null, "NumberOfBinaryFeatures": 0, "PercentageOfBinaryFeatures": 0, "PercentageOfInstancesWithMissingValues": 57.71488666801338, "AutoCorrelation": null, "PercentageOfMissingValues": 6.241258579506641 }, "tags": [ { "uploader": "38960", "tag": "Machine Learning" }, { "uploader": "38960", "tag": "Mathematics" } ], "features": [ { "name": "Chapters", "index": "0", "type": "numeric", "distinct": "228", "missing": "0", "min": "1", "max": "542", "mean": "4", "stdev": "8" }, { "name": "Favs", "index": "1", "type": "string", "distinct": "3483", "missing": "72163" }, { "name": "Follows", "index": "2", "type": "string", "distinct": "3139", "missing": "143204" }, { "name": "Published", "index": "3", "type": "string", "distinct": "5502", "missing": "0" }, { "name": "Reviews", "index": "4", "type": "numeric", "distinct": "2458", "missing": "52189", "min": "1", "max": "38111", "mean": "37", "stdev": "203" }, { "name": "Updated", "index": "5", "type": "string", "distinct": "5535", "missing": "410228", "ignore": "1" }, { "name": "Words", "index": "6", "type": "string", "distinct": "68086", "missing": "0" }, { "name": "author", "index": "7", "type": "string", "distinct": "156280", "missing": "0" }, { "name": "characters", "index": "8", "type": "string", "distinct": "36517", "missing": "63040" }, { "name": "genre", "index": "9", "type": "string", "distinct": "403", "missing": "59927" }, { "name": "language", "index": "10", "type": "string", "distinct": "43", "missing": "0" }, { "name": "rating", "index": "11", "type": "string", "distinct": "4", "missing": "0" }, { "name": "story_link", "index": "12", "type": "string", "distinct": "648090", "missing": "0" }, { "name": "synopsis", "index": "13", "type": "string", "distinct": "647085", "missing": "37" }, { "name": "title", "index": "14", "type": "string", "distinct": "474188", "missing": "203" }, { "name": "published_mmyy", "index": "15", "type": "string", "distinct": "181", "missing": "0" }, { "name": "pairing", "index": "16", "type": "string", "distinct": "9991", "missing": "256823" } ], "nr_of_issues": 0, "nr_of_downvotes": 0, "nr_of_likes": 0, "nr_of_downloads": 0, "total_downloads": 0, "reach": 0, "reuse": 0, "impact_of_reuse": 0, "reach_of_reuse": 0, "impact": 0 }