{
    "data_id": "43542",
    "name": "7k-Books",
    "exact_name": "7k-Books",
    "version": 1,
    "version_label": "v1.0",
    "description": "Do we really need another dataset of books?\nMy initial plan was to build a toy example for a recommender system article I was writing. After a bit of googling, I found a few datasets. Sadly, most of them had some issues that made them unusable for me (e.g, missing description of the book, a mix of different languages but no column to specify the language per row or weird delimiters). \nSo I decided to make a dataset that would match my purposes.\nFirst, I got ISBNs from Soumik's Goodreads-books dataset. Using those identifiers, I crawled the Google Books API to extract the books' information.\nThen, I merged those results with some of the original columns from the dataset and after some cleaning I got the dataset you see here.\nWhat can I do with this?\nDifferent Exploratory Data Analysis, clustering of books by topics\/category, content-based recommendation engine using different fields from the book's description. \nWhy is this dataset smaller than Soumik's Goodreads-books?\nMany of the ISBNs of that dataset did not return valid results from the Google Books API. I plan to update this in the future, using more fields (e.g., title, author) in the API requests, as to have a bigger dataset.\nWhat did you use to build this dataset?\nCheck out the repoistory here Google Books Crawler\nAcknowledgements\nThis dataset relied heavily on Soumik's Goodreads-books dataset.",
    "format": "arff",
    "uploader": "Onur Yildirim",
    "uploader_id": 30126,
    "visibility": "public",
    "creator": null,
    "contributor": null,
    "date": "2022-03-23 13:46:08",
    "update_comment": null,
    "last_update": "2022-03-23 13:46:08",
    "licence": "CC0: Public Domain",
    "status": "active",
    "error_message": null,
    "url": "https:\/\/www.openml.org\/data\/download\/22102367\/dataset",
    "default_target_attribute": null,
    "row_id_attribute": null,
    "ignore_attribute": null,
    "runs": 0,
    "suggest": {
        "input": [
            "7k-Books",
            "Do we really need another dataset of books? My initial plan was to build a toy example for a recommender system article I was writing. After a bit of googling, I found a few datasets. Sadly, most of them had some issues that made them unusable for me (e.g, missing description of the book, a mix of different languages but no column to specify the language per row or weird delimiters). So I decided to make a dataset that would match my purposes. First, I got ISBNs from Soumik's Goodreads-books dat "
        ],
        "weight": 5
    },
    "qualities": {
        "NumberOfInstances": 6810,
        "NumberOfFeatures": 12,
        "NumberOfClasses": null,
        "NumberOfMissingValues": 5336,
        "NumberOfInstancesWithMissingValues": 4630,
        "NumberOfNumericFeatures": 5,
        "NumberOfSymbolicFeatures": 0,
        "Dimensionality": 0.001762114537444934,
        "PercentageOfNumericFeatures": 41.66666666666667,
        "MajorityClassPercentage": null,
        "PercentageOfSymbolicFeatures": 0,
        "MajorityClassSize": null,
        "MinorityClassPercentage": null,
        "MinorityClassSize": null,
        "NumberOfBinaryFeatures": 0,
        "PercentageOfBinaryFeatures": 0,
        "PercentageOfInstancesWithMissingValues": 67.98825256975036,
        "AutoCorrelation": null,
        "PercentageOfMissingValues": 6.529613313754283
    },
    "tags": [
        {
            "uploader": "38960",
            "tag": "Machine Learning"
        },
        {
            "uploader": "38960",
            "tag": "Mathematics"
        }
    ],
    "features": [
        {
            "name": "isbn13",
            "index": "0",
            "type": "numeric",
            "distinct": "6810",
            "missing": "0",
            "min": "2147483647",
            "max": "2147483647",
            "mean": "2147483647",
            "stdev": "606891057"
        },
        {
            "name": "isbn10",
            "index": "1",
            "type": "string",
            "distinct": "6810",
            "missing": "0"
        },
        {
            "name": "title",
            "index": "2",
            "type": "string",
            "distinct": "6394",
            "missing": "4"
        },
        {
            "name": "subtitle",
            "index": "3",
            "type": "string",
            "distinct": "2009",
            "missing": "4429"
        },
        {
            "name": "authors",
            "index": "4",
            "type": "string",
            "distinct": "3775",
            "missing": "77"
        },
        {
            "name": "categories",
            "index": "5",
            "type": "string",
            "distinct": "567",
            "missing": "99"
        },
        {
            "name": "thumbnail",
            "index": "6",
            "type": "string",
            "distinct": "6481",
            "missing": "329"
        },
        {
            "name": "description",
            "index": "7",
            "type": "string",
            "distinct": "6473",
            "missing": "263"
        },
        {
            "name": "published_year",
            "index": "8",
            "type": "numeric",
            "distinct": "94",
            "missing": "6",
            "min": "1853",
            "max": "2019",
            "mean": "1999",
            "stdev": "10"
        },
        {
            "name": "average_rating",
            "index": "9",
            "type": "numeric",
            "distinct": "200",
            "missing": "43",
            "min": "0",
            "max": "5",
            "mean": "4",
            "stdev": "0"
        },
        {
            "name": "num_pages",
            "index": "10",
            "type": "numeric",
            "distinct": "915",
            "missing": "43",
            "min": "0",
            "max": "3342",
            "mean": "348",
            "stdev": "242"
        },
        {
            "name": "ratings_count",
            "index": "11",
            "type": "numeric",
            "distinct": "3881",
            "missing": "43",
            "min": "0",
            "max": "5629932",
            "mean": "21069",
            "stdev": "137621"
        }
    ],
    "nr_of_issues": 0,
    "nr_of_downvotes": 0,
    "nr_of_likes": 0,
    "nr_of_downloads": 0,
    "total_downloads": 0,
    "reach": 0,
    "reuse": 0,
    "impact_of_reuse": 0,
    "reach_of_reuse": 0,
    "impact": 0
}