{ "data_id": "45714", "name": "PriceRunner", "exact_name": "PriceRunner", "version": 3, "version_label": "1.0", "description": "These datasets originate from PriceRunner, a popular product comparison platform. They contain product-related information including product IDs, titles, and categories. It can be used for numerous tasks, such as classification, clustering, record linkage, etc.\n\nColumn description:\n * Product ID\n * Product Title as it appears in the respective product comparison platform (lower case and with punctuation removed)\n * Vendor ID: the ID of the electronic store that provides the product.\n * Cluster ID: the ID of the cluster that the product belongs to. Useful for entity matching and clustering tasks.\n * Cluster Label: The title of the aforementioned cluster.\n * Category ID: the ID of the category that the product belongs to. Useful for classification and categorization tasks.\n * Category Label: The title of the aforementioned category.\n\nCitations:\n * L. Akritidis, A. Fevgas, P. Bozanis, C. Makris, \"A Self-Verifying Clustering Approach to Unsupervised Matching of Product Titles\", Artificial Intelligence Review (Springer), pp. 1-44, 2020.\n * L. Akritidis, P. Bozanis, \"Effective Unsupervised Matching of Product Titles with k-Combinations and Permutations\", In Proceedings of the 14th IEEE International Conference on Innovations in Intelligent Systems and Applications (INISTA), pp. 1-10, 2018.\n * L. Akritidis, A. Fevgas, P. Bozanis, \"Effective Product Categorization with Importance Scores and Morphological Analysis of the Titles\", In Proceedings of the 30th IEEE International Conference on Tools with Artificial Intelligence IICTAI), pp. 213-220, 2018.", "format": "arff", "uploader": "Leonidas Akritidis", "uploader_id": 38834, "visibility": "public", "creator": "\"Leonidas Akritidis\"", "contributor": null, "date": "2024-01-02 00:20:09", "update_comment": null, "last_update": "2024-01-02 00:20:09", "licence": "BSD", "status": "active", "error_message": null, "url": "https:\/\/api.openml.org\/data\/download\/22117225\/dataset", "default_target_attribute": "category_label", "row_id_attribute": null, "ignore_attribute": "\"id\"", "runs": 0, "suggest": { "input": [ "PriceRunner", "These datasets originate from PriceRunner, a popular product comparison platform. They contain product-related information including product IDs, titles, and categories. It can be used for numerous tasks, such as classification, clustering, record linkage, etc. Column description: * Product ID * Product Title as it appears in the respective product comparison platform (lower case and with punctuation removed) * Vendor ID: the ID of the electronic store that provides the product. * Cluster ID: th " ], "weight": 5 }, "qualities": { "NumberOfInstances": 35300, "NumberOfFeatures": 6, "NumberOfClasses": 10, "NumberOfMissingValues": 0, "NumberOfInstancesWithMissingValues": 0, "NumberOfNumericFeatures": 3, "NumberOfSymbolicFeatures": 2, "AutoCorrelation": 0.9997450352701209, "PercentageOfMissingValues": 0, "Dimensionality": 0.00016997167138810198, "PercentageOfNumericFeatures": 50, "MajorityClassPercentage": 15.583569405099151, "PercentageOfSymbolicFeatures": 33.33333333333333, "MajorityClassSize": 5501, "MinorityClassPercentage": 6.266288951841361, "MinorityClassSize": 2212, "NumberOfBinaryFeatures": 0, "PercentageOfBinaryFeatures": 0, "PercentageOfInstancesWithMissingValues": 0 }, "tags": [], "features": [ { "name": "category_label", "index": "6", "type": "nominal", "distinct": "10", "missing": "0", "target": "1", "distr": [ [ "CPUs", "Digital Cameras", "Dishwashers", "Freezers", "Fridge Freezers", "Fridges", "Microwaves", "Mobile Phones", "TVs", "Washing Machines" ], [ [ "3860", "0", "0", "0", "0", "0", "0", "0", "0", "0" ], [ "0", "2694", "0", "0", "0", "0", "0", "0", "0", "0" ], [ "0", "0", "3424", "0", "0", "0", "0", "0", "0", "0" ], [ "0", "0", "0", "2212", "0", "0", "0", "0", "0", "0" ], [ "0", "0", "0", "0", "5501", "0", "0", "0", "0", "0" ], [ "0", "0", "0", "0", "0", "3584", "0", "0", "0", "0" ], [ "0", "0", "0", "0", "0", "0", "2342", "0", "0", "0" ], [ "0", "0", "0", "0", "0", "0", "0", "4075", "0", "0" ], [ "0", "0", "0", "0", "0", "0", "0", "0", "3564", "0" ], [ "0", "0", "0", "0", "0", "0", "0", "0", "0", "4044" ] ] ] }, { "name": "id", "index": "0", "type": "numeric", "distinct": "35300", "missing": "0", "ignore": "1", "min": "1", "max": "47358", "mean": "26156", "stdev": "13497" }, { "name": "product_title", "index": "1", "type": "string", "distinct": "30982", "missing": "0" }, { "name": "vendor_id", "index": "2", "type": "numeric", "distinct": "306", "missing": "0", "min": "1", "max": "371", "mean": "121", "stdev": "117" }, { "name": "cluster_id", "index": "3", "type": "numeric", "distinct": "13225", "missing": "0", "min": "1", "max": "47525", "mean": "30116", "stdev": "18408" }, { "name": "cluster_label", "index": "4", "type": "nominal", "distinct": "12841", "missing": "0", "distr": [] }, { "name": "category_id", "index": "5", "type": "numeric", "distinct": "10", "missing": "0", "min": "2612", "max": "2623", "mean": "2618", "stdev": "4" } ], "nr_of_issues": 0, "nr_of_downvotes": 0, "nr_of_likes": 0, "nr_of_downloads": 0, "total_downloads": 0, "reach": 0, "reuse": 0, "impact_of_reuse": 0, "reach_of_reuse": 0, "impact": 0 }