{ "data_id": "44769", "name": "okcupid-stem_seed_1_nrows_2000_nclasses_10_ncols_100_stratify_True", "exact_name": "okcupid-stem_seed_1_nrows_2000_nclasses_10_ncols_100_stratify_True", "version": 1, "version_label": "c5e5fc39-5748-4778-92d4-fc697085b2c1", "description": "Subsampling of the dataset okcupid-stem (42734) with\n\nseed=1\nargs.nrows=2000\nargs.ncols=100\nargs.nclasses=10\nargs.no_stratify=True\nGenerated with the following source code:\n\n\n```python\n def subsample(\n self,\n seed: int,\n nrows_max: int = 2_000,\n ncols_max: int = 100,\n nclasses_max: int = 10,\n stratified: bool = True,\n ) -> Dataset:\n rng = np.random.default_rng(seed)\n\n x = self.x\n y = self.y\n\n # Uniformly sample\n classes = y.unique()\n if len(classes) > nclasses_max:\n vcs = y.value_counts()\n selected_classes = rng.choice(\n classes,\n size=nclasses_max,\n replace=False,\n p=vcs \/ sum(vcs),\n )\n\n # Select the indices where one of these classes is present\n idxs = y.index[y.isin(classes)]\n x = x.iloc[idxs]\n y = y.iloc[idxs]\n\n # Uniformly sample columns if required\n if len(x.columns) > ncols_max:\n columns_idxs = rng.choice(\n list(range(len(x.columns))), size=ncols_max, replace=False\n )\n sorted_column_idxs = sorted(columns_idxs)\n selected_columns = list(x.columns[sorted_column_idxs])\n x = x[selected_columns]\n else:\n sorted_column_idxs = list(range(len(x.columns)))\n\n if len(x) > nrows_max:\n # Stratify accordingly\n target_name = y.name\n data = pd.concat((x, y), axis=\"columns\")\n _, subset = train_test_split(\n data,\n test_size=nrows_max,\n stratify=data[target_name],\n shuffle=True,\n random_state=seed,\n )\n x = subset.drop(target_name, axis=\"columns\")\n y = subset[target_name]\n\n # We need to convert categorical columns to string for openml\n categorical_mask = [self.categorical_mask[i] for i in sorted_column_idxs]\n columns = list(x.columns)\n\n return Dataset(\n # Technically this is not the same but it's where it was derived from\n dataset=self.dataset,\n x=x,\n y=y,\n categorical_mask=categorical_mask,\n columns=columns,\n )\n```", "format": "arff", "uploader": "Eddie Bergman", "uploader_id": 32840, "visibility": "public", "creator": "\"Eddie Bergman\"", "contributor": null, "date": "2022-11-17 18:49:54", "update_comment": null, "last_update": "2022-11-17 18:49:54", "licence": "Public", "status": "active", "error_message": null, "url": "https:\/\/api.openml.org\/data\/download\/22111531\/dataset", "kaggle_url": null, "default_target_attribute": "job", "row_id_attribute": null, "ignore_attribute": null, "runs": 0, "suggest": { "input": [ "okcupid-stem_seed_1_nrows_2000_nclasses_10_ncols_100_stratify_True", "Subsampling of the dataset okcupid-stem (42734) with seed=1 args.nrows=2000 args.ncols=100 args.nclasses=10 args.no_stratify=True Generated with the following source code: ```python def subsample( self, seed: int, nrows_max: int = 2_000, ncols_max: int = 100, nclasses_max: int = 10, stratified: bool = True, ) -> Dataset: rng = np.random.default_rng(seed) x = self.x y = self.y # Uniformly sample classes = y.unique() if len(classes) > nclasses_max: vcs = y.value_counts() selected_classes = rng.cho " ], "weight": 5 }, "qualities": { "NumberOfInstances": 2000, "NumberOfFeatures": 20, "NumberOfClasses": 3, "NumberOfMissingValues": 6117, "NumberOfInstancesWithMissingValues": 1926, "NumberOfNumericFeatures": 2, "NumberOfSymbolicFeatures": 18, "PercentageOfBinaryFeatures": 5, "PercentageOfInstancesWithMissingValues": 96.3, "PercentageOfMissingValues": 15.2925, "AutoCorrelation": 0.5482741370685342, "PercentageOfNumericFeatures": 10, "Dimensionality": 0.01, "PercentageOfSymbolicFeatures": 90, "MajorityClassPercentage": 71.6, "MajorityClassSize": 1432, "MinorityClassPercentage": 9.6, "MinorityClassSize": 192, "NumberOfBinaryFeatures": 1 }, "tags": [], "features": [ { "name": "job", "index": "19", "type": "nominal", "distinct": "3", "missing": "0", "target": "1", "distr": [ [ "stem", "non_stem", "student" ], [ [ "376", "0", "0" ], [ "0", "1432", "0" ], [ "0", "0", "192" ] ] ] }, { "name": "age", "index": "0", "type": "numeric", "distinct": "52", "missing": "0", "min": "18", "max": "69", "mean": "33", "stdev": "10" }, { "name": "body_type", "index": "1", "type": "nominal", "distinct": "12", "missing": "155", "distr": [ [ "a little extra", "athletic", "average", "curvy", "fit", "full figured", "jacked", "overweight", "rather not say", "skinny", "thin", "used up" ], [ [ "18", "68", "15" ], [ "73", "325", "22" ], [ "90", "333", "45" ], [ "3", "108", "16" ], [ "102", "296", "34" ], [ "4", "22", "3" ], [ "3", "13", "2" ], [ "6", "8", "4" ], [ "2", "5", "1" ], [ "8", "34", "12" ], [ "29", "108", "18" ], [ "4", "10", "1" ] ] ] }, { "name": "diet", "index": "2", "type": "nominal", "distinct": "16", "missing": "744", "distr": [ [ "anything", "halal", "kosher", "mostly anything", "mostly halal", "mostly kosher", "mostly other", "mostly vegan", "mostly vegetarian", "other", "strictly anything", "strictly halal", "strictly kosher", "strictly other", "strictly vegan", "strictly vegetarian", "vegan", "vegetarian" ], [ [ "37", "161", "21" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "110", "416", "59" ], [ "0", "2", "0" ], [ "0", "1", "0" ], [ "4", "31", "3" ], [ "4", "9", "3" ], [ "24", "116", "13" ], [ "2", "5", "2" ], [ "46", "100", "13" ], [ "1", "1", "0" ], [ "1", "0", "0" ], [ "5", "16", "0" ], [ "1", "4", "2" ], [ "5", "15", "2" ], [ "0", "0", "1" ], [ "3", "15", "2" ] ] ] }, { "name": "drinks", "index": "3", "type": "nominal", "distinct": "6", "missing": "62", "distr": [ [ "desperately", "not at all", "often", "rarely", "socially", "very often" ], [ [ "4", "4", "0" ], [ "14", "74", "10" ], [ "37", "106", "17" ], [ "37", "149", "27" ], [ "270", "1042", "133" ], [ "2", "10", "2" ] ] ] }, { "name": "drugs", "index": "4", "type": "nominal", "distinct": "3", "missing": "464", "distr": [ [ "never", "often", "sometimes" ], [ [ "226", "909", "117" ], [ "2", "12", "4" ], [ "46", "188", "32" ] ] ] }, { "name": "education", "index": "5", "type": "nominal", "distinct": "29", "missing": "147", "distr": [ [ "college\/university", "dropped out of college\/university", "dropped out of high school", "dropped out of law school", "dropped out of masters program", "dropped out of med school", "dropped out of ph.d program", "dropped out of space camp", "dropped out of two-year college", "graduated from college\/university", "graduated from high school", "graduated from law school", "graduated from masters program", "graduated from med school", "graduated from ph.d program", "graduated from space camp", "graduated from two-year college", "high school", "law school", "masters program", "med school", "ph.d program", "space camp", "two-year college", "working on college\/university", "working on high school", "working on law school", "working on masters program", "working on med school", "working on ph.d program", "working on space camp", "working on two-year college" ], [ [ "4", "17", "0" ], [ "11", "27", "0" ], [ "0", "1", "0" ], [ "0", "0", "0" ], [ "4", "5", "0" ], [ "1", "0", "0" ], [ "2", "4", "0" ], [ "6", "12", "1" ], [ "1", "4", "0" ], [ "189", "603", "18" ], [ "3", "52", "9" ], [ "1", "50", "2" ], [ "75", "226", "7" ], [ "0", "17", "0" ], [ "24", "22", "0" ], [ "4", "22", "0" ], [ "3", "35", "5" ], [ "0", "4", "0" ], [ "0", "0", "0" ], [ "1", "2", "0" ], [ "0", "0", "0" ], [ "0", "1", "0" ], [ "0", "2", "0" ], [ "0", "7", "0" ], [ "10", "108", "83" ], [ "0", "1", "0" ], [ "0", "5", "5" ], [ "6", "30", "17" ], [ "0", "3", "3" ], [ "7", "11", "20" ], [ "1", "14", "0" ], [ "1", "27", "17" ] ] ] }, { "name": "ethnicity", "index": "6", "type": "nominal", "distinct": "57", "missing": "164", "distr": [ [ "asian", "asian, black", "asian, black, hispanic \/ latin", "asian, black, hispanic \/ latin, other", "asian, black, hispanic \/ latin, white", "asian, black, hispanic \/ latin, white, other", "asian, black, indian", "asian, black, native american", "asian, black, native american, hispanic \/ latin", "asian, black, native american, hispanic \/ latin, white", "asian, black, native american, indian", "asian, black, native american, indian, hispanic \/ latin, white, other", "asian, black, native american, indian, pacific islander, hispanic \/ latin", "asian, black, native american, indian, pacific islander, white", "asian, black, native american, other", "asian, black, native american, pacific islander", "asian, black, native american, pacific islander, other", "asian, black, native american, pacific islander, white", "asian, black, native american, pacific islander, white, other", "asian, black, native american, white", "asian, black, native american, white, other", "asian, black, other", "asian, black, pacific islander", "asian, black, pacific islander, hispanic \/ latin", "asian, black, pacific islander, hispanic \/ latin, white", "asian, black, pacific islander, other", "asian, black, pacific islander, white", "asian, black, pacific islander, white, other", "asian, black, white", "asian, black, white, other", "asian, hispanic \/ latin", "asian, hispanic \/ latin, other", "asian, hispanic \/ latin, white", "asian, hispanic \/ latin, white, other", "asian, indian", "asian, indian, hispanic \/ latin", "asian, indian, hispanic \/ latin, other", "asian, indian, hispanic \/ latin, white", "asian, indian, other", "asian, indian, pacific islander", "asian, indian, pacific islander, hispanic \/ latin, white, other", "asian, indian, pacific islander, other", "asian, indian, white", "asian, indian, white, other", "asian, middle eastern", "asian, middle eastern, black", "asian, middle eastern, black, indian, pacific islander, hispanic \/ latin, white", "asian, middle eastern, black, native american, hispanic \/ latin, white", "asian, middle eastern, black, native american, indian, pacific islander, hispanic \/ latin", "asian, middle eastern, black, native american, indian, pacific islander, hispanic \/ latin, other", "asian, middle eastern, black, native american, indian, pacific islander, hispanic \/ latin, white", "asian, middle eastern, black, native american, indian, pacific islander, hispanic \/ latin, white, other", "asian, middle eastern, black, native american, pacific islander, hispanic \/ latin, white, other", "asian, middle eastern, black, pacific islander", "asian, middle eastern, black, pacific islander, hispanic \/ latin", "asian, middle eastern, black, pacific islander, hispanic \/ latin, white", "asian, middle eastern, black, white, other", "asian, middle eastern, hispanic \/ latin", "asian, middle eastern, hispanic \/ latin, white", "asian, middle eastern, hispanic \/ latin, white, other", "asian, middle eastern, indian", "asian, middle eastern, indian, hispanic \/ latin", "asian, middle eastern, indian, hispanic \/ latin, white, other", "asian, middle eastern, indian, other", "asian, middle eastern, native american, hispanic \/ latin, white", "asian, middle eastern, native american, indian, pacific islander, hispanic \/ latin, white", "asian, middle eastern, native american, pacific islander, other", "asian, middle eastern, native american, pacific islander, white, other", "asian, middle eastern, other", "asian, middle eastern, white", "asian, middle eastern, white, other", "asian, native american", "asian, native american, hispanic \/ latin", "asian, native american, hispanic \/ latin, other", "asian, native american, hispanic \/ latin, white", "asian, native american, hispanic \/ latin, white, other", "asian, native american, indian, pacific islander, hispanic \/ latin, white", "asian, native american, indian, pacific islander, hispanic \/ latin, white, other", "asian, native american, other", "asian, native american, pacific islander", "asian, native american, pacific islander, hispanic \/ latin, white", "asian, native american, pacific islander, hispanic \/ latin, white, other", "asian, native american, pacific islander, white", "asian, native american, pacific islander, white, other", "asian, native american, white", "asian, native american, white, other", "asian, other", "asian, pacific islander", "asian, pacific islander, hispanic \/ latin", "asian, pacific islander, hispanic \/ latin, other", "asian, pacific islander, hispanic \/ latin, white", "asian, pacific islander, hispanic \/ latin, white, other", "asian, pacific islander, other", "asian, pacific islander, white", "asian, pacific islander, white, other", "asian, white", "asian, white, other", "black", "black, hispanic \/ latin", "black, hispanic \/ latin, other", "black, hispanic \/ latin, white", "black, hispanic \/ latin, white, other", "black, indian", "black, indian, hispanic \/ latin", "black, indian, hispanic \/ latin, white", "black, indian, other", "black, indian, white", "black, indian, white, other", "black, native american", "black, native american, hispanic \/ latin", "black, native american, hispanic \/ latin, other", "black, native american, hispanic \/ latin, white", "black, native american, hispanic \/ latin, white, other", "black, native american, indian", "black, native american, indian, hispanic \/ latin, white, other", "black, native american, indian, other", "black, native american, indian, white, other", "black, native american, other", "black, native american, pacific islander", "black, native american, pacific islander, hispanic \/ latin, white", "black, native american, pacific islander, hispanic \/ latin, white, other", "black, native american, pacific islander, other", "black, native american, pacific islander, white", "black, native american, pacific islander, white, other", "black, native american, white", "black, native american, white, other", "black, other", "black, pacific islander", "black, pacific islander, hispanic \/ latin", "black, pacific islander, other", "black, pacific islander, white", "black, white", "black, white, other", "hispanic \/ latin", "hispanic \/ latin, other", "hispanic \/ latin, white", "hispanic \/ latin, white, other", "indian", "indian, hispanic \/ latin", "indian, hispanic \/ latin, other", "indian, hispanic \/ latin, white", "indian, hispanic \/ latin, white, other", "indian, other", "indian, pacific islander", "indian, pacific islander, hispanic \/ latin, white", "indian, white", "indian, white, other", "middle eastern", "middle eastern, black", "middle eastern, black, hispanic \/ latin", "middle eastern, black, native american, hispanic \/ latin, white", "middle eastern, black, native american, indian", "middle eastern, black, native american, indian, hispanic \/ latin, white", "middle eastern, black, native american, indian, pacific islander, hispanic \/ latin, white", "middle eastern, black, native american, indian, pacific islander, hispanic \/ latin, white, other", "middle eastern, black, native american, indian, white, other", "middle eastern, black, native american, white", "middle eastern, black, native american, white, other", "middle eastern, black, other", "middle eastern, black, pacific islander, white", "middle eastern, black, white", "middle eastern, hispanic \/ latin", "middle eastern, hispanic \/ latin, other", "middle eastern, hispanic \/ latin, white", "middle eastern, hispanic \/ latin, white, other", "middle eastern, indian", "middle eastern, indian, other", "middle eastern, indian, white", "middle eastern, indian, white, other", "middle eastern, native american, hispanic \/ latin", "middle eastern, native american, hispanic \/ latin, white", "middle eastern, native american, hispanic \/ latin, white, other", "middle eastern, native american, white", "middle eastern, native american, white, other", "middle eastern, other", "middle eastern, pacific islander", "middle eastern, pacific islander, hispanic \/ latin", "middle eastern, pacific islander, other", "middle eastern, white", "middle eastern, white, other", "native american", "native american, hispanic \/ latin", "native american, hispanic \/ latin, other", "native american, hispanic \/ latin, white", "native american, hispanic \/ latin, white, other", "native american, indian", "native american, indian, pacific islander, hispanic \/ latin", "native american, indian, white", "native american, other", "native american, pacific islander", "native american, pacific islander, hispanic \/ latin", "native american, pacific islander, hispanic \/ latin, white", "native american, pacific islander, hispanic \/ latin, white, other", "native american, pacific islander, white", "native american, pacific islander, white, other", "native american, white", "native american, white, other", "other", "pacific islander", "pacific islander, hispanic \/ latin", "pacific islander, hispanic \/ latin, other", "pacific islander, hispanic \/ latin, white", "pacific islander, hispanic \/ latin, white, other", "pacific islander, other", "pacific islander, white", "pacific islander, white, other", "white", "white, other" ], [ [ "48", "127", "29" ], [ "1", "4", "1" ], [ "0", "0", "0" ], [ "0", "1", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "1", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "2", "2" ], [ "0", "0", "0" ], [ "0", "1", "0" ], [ "0", "1", "0" ], [ "1", "1", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "1", "1", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "1", "0", "0" ], [ "0", "1", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "1", "0", "0" ], [ "0", "0", "0" ], [ "0", "1", "0" ], [ "2", "7", "1" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "1", "0" ], [ "0", "0", "0" ], [ "0", "2", "1" ], [ "0", "1", "0" ], [ "0", "1", "0" ], [ "6", "20", "5" ], [ "0", "3", "0" ], [ "3", "66", "8" ], [ "0", "3", "0" ], [ "0", "0", "0" ], [ "0", "3", "0" ], [ "0", "0", "0" ], [ "0", "1", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "1", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "1", "1", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "1", "4", "1" ], [ "0", "0", "1" ], [ "0", "4", "1" ], [ "0", "0", "0" ], [ "1", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "1", "7", "0" ], [ "0", "0", "1" ], [ "10", "67", "16" ], [ "0", "4", "0" ], [ "6", "31", "7" ], [ "1", "3", "0" ], [ "14", "19", "1" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "1", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "2", "7", "2" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "1", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "4", "9", "1" ], [ "0", "2", "0" ], [ "0", "1", "0" ], [ "0", "2", "0" ], [ "0", "0", "0" ], [ "0", "2", "1" ], [ "0", "1", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "3", "10", "3" ], [ "0", "1", "0" ], [ "5", "34", "4" ], [ "0", "8", "3" ], [ "0", "1", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "1", "0" ], [ "0", "2", "0" ], [ "0", "5", "0" ], [ "0", "1", "0" ], [ "231", "819", "87" ], [ "1", "16", "1" ] ] ] }, { "name": "height", "index": "7", "type": "numeric", "distinct": "27", "missing": "0", "min": "3", "max": "93", "mean": "68", "stdev": "4" }, { "name": "income", "index": "8", "type": "nominal", "distinct": "12", "missing": "1551", "distr": [ [ "20000", "30000", "40000", "50000", "60000", "70000", "80000", "100000", "150000", "250000", "500000", "1000000" ], [ [ "3", "81", "36" ], [ "0", "38", "2" ], [ "4", "25", "0" ], [ "6", "31", "0" ], [ "9", "28", "1" ], [ "8", "22", "0" ], [ "12", "19", "0" ], [ "31", "47", "0" ], [ "4", "15", "0" ], [ "2", "3", "0" ], [ "2", "2", "1" ], [ "2", "13", "2" ] ] ] }, { "name": "location", "index": "9", "type": "nominal", "distinct": "59", "missing": "0", "distr": [ [ "alameda, california", "albany, california", "amsterdam, netherlands", "arcadia, california", "ashland, california", "atherton, california", "atlanta, georgia", "austin, texas", "bayshore, california", "bellingham, washington", "bellwood, illinois", "belmont, california", "belvedere tiburon, california", "benicia, california", "berkeley, california", "billings, montana", "boise, idaho", "bolinas, california", "bonaduz, switzerland", "boston, massachusetts", "boulder, colorado", "brea, california", "brisbane, california", "brooklyn, new york", "burlingame, california", "cambridge, massachusetts", "campbell, california", "canyon country, california", "canyon, california", "castro valley, california", "chicago, illinois", "chico, california", "cincinnati, ohio", "colma, california", "columbus, ohio", "concord, california", "cork, ireland", "corte madera, california", "costa mesa, california", "crockett, california", "daly city, california", "east palo alto, california", "edinburgh, united kingdom", "el cerrito, california", "el granada, california", "el sobrante, california", "emeryville, california", "fairfax, california", "forest knolls, california", "foster city, california", "freedom, california", "fremont, california", "glencove, california", "grand rapids, michigan", "granite bay, california", "green brae, california", "hacienda heights, california", "half moon bay, california", "hayward, california", "hercules, california", "hilarita, california", "hillsborough, california", "honolulu, hawaii", "irvine, california", "isla vista, california", "islip terrace, new york", "jackson, mississippi", "kansas city, missouri", "kassel, germany", "kensington, california", "kentfield, california", "kula, hawaii", "lafayette, california", "lagunitas, california", "lake orion, michigan", "larkspur, california", "las vegas, nevada", "leander, texas", "livingston, california", "london, united kingdom", "long beach, california", "long beach, new york", "longwood, florida", "los angeles, california", "los gatos, california", "madrid, spain", "magalia, california", "martinez, california", "menlo park, california", "miami, florida", "mill valley, california", "millbrae, california", "milpitas, california", "minneapolis, minnesota", "montara, california", "moraga, california", "moss beach, california", "mountain view, california", "muir beach, california", "murfreesboro, tennessee", "napa, california", "nevada city, california", "new york, new york", "nha trang, vietnam", "nicasio, california", "north hollywood, california", "novato, california", "oakland, california", "oceanview, california", "olema, california", "orinda, california", "ozone park, new york", "pacheco, california", "pacifica, california", "palo alto, california", "pasadena, california", "peoria, illinois", "petaluma, california", "philadelphia, pennsylvania", "phoenix, arizona", "piedmont, california", "pinole, california", "pleasant hill, california", "point richmond, california", "port costa, california", "portland, oregon", "providence, rhode island", "redwood city, california", "redwood shores, california", "richmond, california", "rochester, michigan", "rodeo, california", "rohnert park, california", "ross, california", "sacramento, california", "salt lake city, utah", "san anselmo, california", "san antonio, texas", "san bruno, california", "san carlos, california", "san diego, california", "san francisco, california", "san geronimo, california", "san jose, california", "san leandro, california", "san lorenzo, california", "san luis obispo, california", "san mateo, california", "san pablo, california", "san quentin, california", "san rafael, california", "santa ana, california", "santa cruz, california", "santa monica, california", "santa rosa, california", "sausalito, california", "seaside, california", "seattle, washington", "south lake tahoe, california", "south orange, new jersey", "south san francisco, california", "south wellfleet, massachusetts", "stanford, california", "stinson beach, california", "stockton, california", "stratford, connecticut", "studio city, california", "sunnyvale, california", "taunton, massachusetts", "tiburon, california", "tucson, arizona", "union city, california", "utica, michigan", "vacaville, california", "vallejo, california", "vancouver, british columbia, canada", "walnut creek, california", "washington, district of columbia", "waterford, california", "west oakland, california", "westlake, california", "woodacre, california", "woodbridge, virginia", "woodside, california" ], [ [ "5", "27", "3" ], [ "0", "3", "1" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "1" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "1", "0" ], [ "0", "0", "0" ], [ "2", "1", "0" ], [ "0", "0", "0" ], [ "0", "5", "2" ], [ "24", "87", "37" ], [ "1", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "1", "0", "0" ], [ "0", "0", "0" ], [ "1", "0", "0" ], [ "0", "0", "0" ], [ "2", "8", "2" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "10", "3" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "1", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "1", "0" ], [ "3", "13", "3" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "3", "5", "1" ], [ "0", "2", "0" ], [ "1", "3", "0" ], [ "4", "17", "1" ], [ "1", "3", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "2", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "3", "0" ], [ "0", "0", "0" ], [ "0", "6", "0" ], [ "3", "17", "3" ], [ "1", "1", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "2", "5", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "1", "1" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "1", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "1", "8", "1" ], [ "2", "11", "2" ], [ "0", "0", "0" ], [ "1", "6", "1" ], [ "4", "3", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "1", "3", "1" ], [ "0", "0", "0" ], [ "8", "5", "1" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "2", "8", "1" ], [ "23", "179", "13" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "1", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "4", "12", "0" ], [ "14", "25", "5" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "1", "5", "1" ], [ "0", "12", "1" ], [ "0", "1", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "4", "12", "3" ], [ "0", "0", "0" ], [ "2", "11", "5" ], [ "0", "0", "0" ], [ "0", "2", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "4", "0" ], [ "0", "0", "0" ], [ "4", "8", "1" ], [ "3", "5", "0" ], [ "0", "0", "0" ], [ "222", "762", "76" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "2", "24", "3" ], [ "1", "2", "1" ], [ "0", "0", "0" ], [ "15", "24", "5" ], [ "1", "5", "1" ], [ "0", "0", "0" ], [ "2", "24", "1" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "4", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "1", "14", "0" ], [ "0", "0", "0" ], [ "1", "2", "8" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "1", "14", "2" ], [ "0", "0", "0" ], [ "1", "14", "1" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ], [ "0", "0", "0" ] ] ] }, { "name": "offspring", "index": "10", "type": "nominal", "distinct": "13", "missing": "1145", "distr": [ [ "doesn’t have kids", "doesn’t have kids, and doesn’t want any", "doesn’t have kids, but might want them", "doesn’t have kids, but wants them", "doesn’t want kids", "has a kid", "has a kid, and might want more", "has a kid, and wants more", "has a kid, but doesn’t want more", "has kids", "has kids, and might want more", "has kids, and wants more", "has kids, but doesn’t want more", "might want kids", "wants kids" ], [ [ "54", "178", "27" ], [ "10", "25", "8" ], [ "35", "101", "13" ], [ "18", "93", "7" ], [ "17", "84", "10" ], [ "9", "55", "3" ], [ "1", "7", "1" ], [ "0", "0", "0" ], [ "1", "4", "0" ], [ "6", "53", "0" ], [ "1", "2", "0" ], [ "0", "0", "0" ], [ "2", "18", "0" ], [ "1", "3", "1" ], [ "1", "6", "0" ] ] ] }, { "name": "orientation", "index": "11", "type": "nominal", "distinct": "3", "missing": "0", "distr": [ [ "bisexual", "gay", "straight" ], [ [ "12", "61", "19" ], [ "31", "143", "21" ], [ "333", "1228", "152" ] ] ] }, { "name": "pets", "index": "12", "type": "nominal", "distinct": "15", "missing": "589", "distr": [ [ "dislikes cats", "dislikes dogs", "dislikes dogs and dislikes cats", "dislikes dogs and has cats", "dislikes dogs and likes cats", "has cats", "has dogs", "has dogs and dislikes cats", "has dogs and has cats", "has dogs and likes cats", "likes cats", "likes dogs", "likes dogs and dislikes cats", "likes dogs and has cats", "likes dogs and likes cats" ], [ [ "1", "4", "1" ], [ "1", "1", "1" ], [ "1", "3", "1" ], [ "0", "2", "0" ], [ "3", "4", "1" ], [ "11", "33", "4" ], [ "22", "129", "10" ], [ "2", "21", "3" ], [ "1", "35", "6" ], [ "13", "65", "8" ], [ "5", "18", "4" ], [ "49", "187", "25" ], [ "14", "55", "8" ], [ "27", "115", "12" ], [ "126", "321", "58" ] ] ] }, { "name": "religion", "index": "13", "type": "nominal", "distinct": "40", "missing": "626", "distr": [ [ "agnosticism", "agnosticism and laughing about it", "agnosticism and somewhat serious about it", "agnosticism and very serious about it", "agnosticism but not too serious about it", "atheism", "atheism and laughing about it", "atheism and somewhat serious about it", "atheism and very serious about it", "atheism but not too serious about it", "buddhism", "buddhism and laughing about it", "buddhism and somewhat serious about it", "buddhism and very serious about it", "buddhism but not too serious about it", "catholicism", "catholicism and laughing about it", "catholicism and somewhat serious about it", "catholicism and very serious about it", "catholicism but not too serious about it", "christianity", "christianity and laughing about it", "christianity and somewhat serious about it", "christianity and very serious about it", "christianity but not too serious about it", "hinduism", "hinduism and laughing about it", "hinduism and somewhat serious about it", "hinduism and very serious about it", "hinduism but not too serious about it", "islam", "islam and laughing about it", "islam and somewhat serious about it", "islam and very serious about it", "islam but not too serious about it", "judaism", "judaism and laughing about it", "judaism and somewhat serious about it", "judaism and very serious about it", "judaism but not too serious about it", "other", "other and laughing about it", "other and somewhat serious about it", "other and very serious about it", "other but not too serious about it" ], [ [ "13", "62", "8" ], [ "30", "68", "11" ], [ "3", "11", "3" ], [ "5", "9", "0" ], [ "19", "57", "11" ], [ "20", "42", "10" ], [ "28", "47", "16" ], [ "14", "18", "9" ], [ "3", "10", "3" ], [ "20", "24", "4" ], [ "3", "9", "3" ], [ "4", "11", "0" ], [ "4", "9", "2" ], [ "0", "0", "0" ], [ "7", "17", "2" ], [ "6", "25", "3" ], [ "4", "25", "1" ], [ "1", "14", "1" ], [ "0", "0", "1" ], [ "11", "64", "9" ], [ "7", "50", "10" ], [ "1", "11", "2" ], [ "4", "29", "3" ], [ "2", "23", "0" ], [ "13", "48", "4" ], [ "1", "2", "0" ], [ "0", "0", "0" ], [ "2", "1", "0" ], [ "0", "0", "0" ], [ "3", "6", "0" ], [ "1", "0", "0" ], [ "0", "0", "0" ], [ "0", "1", "0" ], [ "0", "1", "0" ], [ "0", "0", "0" ], [ "2", "18", "1" ], [ "5", "17", "1" ], [ "1", "7", "0" ], [ "1", "0", "0" ], [ "4", "35", "4" ], [ "10", "81", "5" ], [ "10", "50", "3" ], [ "2", "17", "3" ], [ "1", "10", "0" ], [ "9", "34", "4" ] ] ] }, { "name": "sex", "index": "14", "type": "nominal", "distinct": "2", "missing": "0", "distr": [ [ "f", "m" ], [ [ "61", "645", "69" ], [ "315", "787", "123" ] ] ] }, { "name": "sign", "index": "15", "type": "nominal", "distinct": "46", "missing": "328", "distr": [ [ "aquarius", "aquarius and it matters a lot", "aquarius and it’s fun to think about", "aquarius but it doesn’t matter", "aries", "aries and it matters a lot", "aries and it’s fun to think about", "aries but it doesn’t matter", "cancer", "cancer and it matters a lot", "cancer and it’s fun to think about", "cancer but it doesn’t matter", "capricorn", "capricorn and it matters a lot", "capricorn and it’s fun to think about", "capricorn but it doesn’t matter", "gemini", "gemini and it matters a lot", "gemini and it’s fun to think about", "gemini but it doesn’t matter", "leo", "leo and it matters a lot", "leo and it’s fun to think about", "leo but it doesn’t matter", "libra", "libra and it matters a lot", "libra and it’s fun to think about", "libra but it doesn’t matter", "pisces", "pisces and it matters a lot", "pisces and it’s fun to think about", "pisces but it doesn’t matter", "sagittarius", "sagittarius and it matters a lot", "sagittarius and it’s fun to think about", "sagittarius but it doesn’t matter", "scorpio", "scorpio and it matters a lot", "scorpio and it’s fun to think about", "scorpio but it doesn’t matter", "taurus", "taurus and it matters a lot", "taurus and it’s fun to think about", "taurus but it doesn’t matter", "virgo", "virgo and it matters a lot", "virgo and it’s fun to think about", "virgo but it doesn’t matter" ], [ [ "6", "24", "1" ], [ "0", "3", "0" ], [ "5", "39", "3" ], [ "14", "29", "4" ], [ "5", "20", "4" ], [ "0", "0", "0" ], [ "6", "41", "10" ], [ "12", "25", "5" ], [ "5", "23", "4" ], [ "0", "4", "0" ], [ "10", "41", "5" ], [ "13", "31", "3" ], [ "3", "16", "4" ], [ "0", "2", "0" ], [ "3", "42", "7" ], [ "9", "35", "5" ], [ "6", "22", "2" ], [ "0", "1", "0" ], [ "9", "49", "7" ], [ "14", "33", "5" ], [ "4", "28", "3" ], [ "1", "1", "0" ], [ "10", "49", "5" ], [ "15", "32", "5" ], [ "5", "20", "2" ], [ "0", "1", "0" ], [ "13", "42", "7" ], [ "12", "36", "6" ], [ "3", "22", "1" ], [ "0", "0", "0" ], [ "10", "37", "7" ], [ "14", "26", "4" ], [ "3", "30", "3" ], [ "2", "0", "0" ], [ "6", "50", "6" ], [ "8", "33", "6" ], [ "6", "32", "0" ], [ "0", "3", "0" ], [ "6", "47", "2" ], [ "11", "27", "7" ], [ "1", "21", "4" ], [ "0", "1", "0" ], [ "8", "48", "5" ], [ "16", "33", "4" ], [ "2", "23", "6" ], [ "0", "1", "1" ], [ "12", "54", "5" ], [ "14", "31", "4" ] ] ] }, { "name": "smokes", "index": "16", "type": "nominal", "distinct": "5", "missing": "141", "distr": [ [ "no", "sometimes", "trying to quit", "when drinking", "yes" ], [ [ "309", "1066", "136" ], [ "21", "101", "17" ], [ "6", "42", "5" ], [ "11", "69", "15" ], [ "4", "46", "11" ] ] ] }, { "name": "speaks", "index": "17", "type": "nominal", "distinct": "553", "missing": "1", "distr": [] }, { "name": "status", "index": "18", "type": "nominal", "distinct": "5", "missing": "0", "distr": [ [ "available", "married", "seeing someone", "single", "unknown" ], [ [ "18", "44", "8" ], [ "2", "8", "1" ], [ "14", "48", "8" ], [ "341", "1331", "175" ], [ "1", "1", "0" ] ] ] } ], "nr_of_issues": 0, "nr_of_downvotes": 0, "nr_of_likes": 0, "nr_of_downloads": 0, "total_downloads": 0, "reach": 0, "reuse": 0, "impact_of_reuse": 0, "reach_of_reuse": 0, "impact": 0 }