{ "data_id": "43691", "name": "Are-Two-Sentences-of-the-Same-Topic", "exact_name": "Are-Two-Sentences-of-the-Same-Topic", "version": 1, "version_label": "v1.0", "description": "Do two sentences come from the same article? We randomly sampled sentences from across Wikipedia. Some sentences came from the same articles, others do not. \nSentences from the Same Article\nThese two sentences are from the same article.\n\nThere were 2,788 housing units at an average density of 4 per squaremile (2\/km). \nIt is also home to the Oklahoma State Reformatory, located in Granite.\n\nSo are these:\n\nMonument of the Judiciary Citadel of Salerno, near the Colle Bellara. \nThe La Carnale Castle got his name from a medieval battle against the Arabs and is part of a sport complex (with pool, tennis courts and hockey).\n\nAs are these:\n\nThe idea of Haar measure is to take a sort of limit of as becomes smaller to make it additive on all pairs of disjoint compact sets, though it first has to be normalized so that the limit is not just infinity. \nWhen left and right Haar measures differ, the right measure is usually preferred as a prior distribution.\n\nSentences from Different Articles\nThese two sentences are from different articles:\n\nUS Open womens doubles champion France Ranked world No. \nThe average household size was 2.72 and the average family size was 3.19.\n\nAs are these:\n\nThe initial goal of the WordNet project was to build a lexical database that would be consistent with theories of human semantic memory developed in the late 1960s. \nMales had a median income of 25,625 versus 20,515 for females.\n\nThese are also different:\n\nMeanwhile, Western foods which are rich in fat, salt, sugar, and refined starches are also imported into countries. \nAccording to the United States Census Bureau, the CDP has a total area of , of which is land and (3.61) is water.\n\nDisclaimer\nPlease note, we attempted to remove any data sampled that includes controversial words or hate speech. However, such language is present in Wikipedia, so some such material may be present in this dataset. Due to the size of this dataset, it was not possible to have a human being audit each sentence.", "format": "arff", "uploader": "Dustin Carrion", "uploader_id": 30123, "visibility": "public", "creator": null, "contributor": null, "date": "2022-03-24 07:14:00", "update_comment": null, "last_update": "2022-03-24 07:14:00", "licence": "CC BY-NC-SA 4.0", "status": "active", "error_message": null, "url": "https:\/\/www.openml.org\/data\/download\/22102516\/dataset", "default_target_attribute": null, "row_id_attribute": null, "ignore_attribute": "\"id\"", "runs": 0, "suggest": { "input": [ "Are-Two-Sentences-of-the-Same-Topic", "Do two sentences come from the same article? We randomly sampled sentences from across Wikipedia. Some sentences came from the same articles, others do not. Sentences from the Same Article These two sentences are from the same article. There were 2,788 housing units at an average density of 4 per squaremile (2\/km). It is also home to the Oklahoma State Reformatory, located in Granite. So are these: Monument of the Judiciary Citadel of Salerno, near the Colle Bellara. The La Carnale Castle got hi " ], "weight": 5 }, "qualities": { "NumberOfInstances": 129156, "NumberOfFeatures": 3, "NumberOfClasses": null, "NumberOfMissingValues": 0, "NumberOfInstancesWithMissingValues": 0, "NumberOfNumericFeatures": 1, "NumberOfSymbolicFeatures": 0, "Dimensionality": 2.3227724612097e-5, "PercentageOfNumericFeatures": 33.33333333333333, "MajorityClassPercentage": null, "PercentageOfSymbolicFeatures": 0, "MajorityClassSize": null, "MinorityClassPercentage": null, "MinorityClassSize": null, "NumberOfBinaryFeatures": 0, "PercentageOfBinaryFeatures": 0, "PercentageOfInstancesWithMissingValues": 0, "AutoCorrelation": null, "PercentageOfMissingValues": 0 }, "tags": [ { "uploader": "38960", "tag": "Computer Systems" }, { "uploader": "38960", "tag": "Transportation" } ], "features": [ { "name": "id", "index": "0", "type": "numeric", "distinct": "129156", "missing": "0", "ignore": "1", "min": "1", "max": "129156", "mean": "64579", "stdev": "37284" }, { "name": "sent1", "index": "1", "type": "string", "distinct": "102604", "missing": "0" }, { "name": "sent2", "index": "2", "type": "string", "distinct": "102694", "missing": "0" }, { "name": "same_source", "index": "3", "type": "numeric", "distinct": "2", "missing": "0", "min": "0", "max": "1", "mean": "1", "stdev": "1" } ], "nr_of_issues": 0, "nr_of_downvotes": 0, "nr_of_likes": 0, "nr_of_downloads": 0, "total_downloads": 0, "reach": 0, "reuse": 0, "impact_of_reuse": 0, "reach_of_reuse": 0, "impact": 0 }