{ "data_id": "43622", "name": "Binary-Dataset-of-Phishing-and-Legitimate-URLs", "exact_name": "Binary-Dataset-of-Phishing-and-Legitimate-URLs", "version": 1, "version_label": "v1.0", "description": "Description\nThe data set is provided csv file which provides the following resources that can be used as inputs for model building :\nA collection of website URLs for 11001 websites. Each sample has 15 website parameters and a class label identifying it as a phishing website or not (0 or 1).\nIf URLs is Phished then label is 0 and for legitimate label is 1\nThe data set also serves as an input for project scoping and tries to specify the functional and non-functional requirements for it.", "format": "arff", "uploader": "Dustin Carrion", "uploader_id": 30123, "visibility": "public", "creator": null, "contributor": null, "date": "2022-03-24 00:42:33", "update_comment": null, "last_update": "2022-03-24 00:42:33", "licence": "Attribution 4.0 International (CC BY 4.0)", "status": "active", "error_message": null, "url": "https:\/\/www.openml.org\/data\/download\/22102447\/dataset", "default_target_attribute": null, "row_id_attribute": null, "ignore_attribute": null, "runs": 0, "suggest": { "input": [ "Binary-Dataset-of-Phishing-and-Legitimate-URLs", "Description The data set is provided csv file which provides the following resources that can be used as inputs for model building : A collection of website URLs for 11001 websites. Each sample has 15 website parameters and a class label identifying it as a phishing website or not (0 or 1). If URLs is Phished then label is 0 and for legitimate label is 1 The data set also serves as an input for project scoping and tries to specify the functional and non-functional requirements for it. " ], "weight": 5 }, "qualities": { "NumberOfInstances": 11000, "NumberOfFeatures": 15, "NumberOfClasses": null, "NumberOfMissingValues": 0, "NumberOfInstancesWithMissingValues": 0, "NumberOfNumericFeatures": 15, "NumberOfSymbolicFeatures": 0, "Dimensionality": 0.0013636363636363637, "PercentageOfNumericFeatures": 100, "MajorityClassPercentage": null, "PercentageOfSymbolicFeatures": 0, "MajorityClassSize": null, "MinorityClassPercentage": null, "MinorityClassSize": null, "NumberOfBinaryFeatures": 0, "PercentageOfBinaryFeatures": 0, "PercentageOfInstancesWithMissingValues": 0, "AutoCorrelation": null, "PercentageOfMissingValues": 0 }, "tags": [ { "uploader": "38960", "tag": "Computer Systems" }, { "uploader": "38960", "tag": "Machine Learning" } ], "features": [ { "name": "whois_regDate", "index": "0", "type": "numeric", "distinct": "4038", "missing": "0", "min": "-1", "max": "12915", "mean": "3051", "stdev": "3206" }, { "name": "whois_expDate", "index": "1", "type": "numeric", "distinct": "1872", "missing": "0", "min": "-327", "max": "31868", "mean": "463", "stdev": "756" }, { "name": "whois_updatedDate", "index": "2", "type": "numeric", "distinct": "1146", "missing": "0", "min": "-1", "max": "3685", "mean": "251", "stdev": "391" }, { "name": "dot_count", "index": "3", "type": "numeric", "distinct": "19", "missing": "0", "min": "1", "max": "28", "mean": "2", "stdev": "2" }, { "name": "url_len", "index": "4", "type": "numeric", "distinct": "347", "missing": "0", "min": "4", "max": "2123", "mean": "49", "stdev": "77" }, { "name": "digit_count", "index": "5", "type": "numeric", "distinct": "128", "missing": "0", "min": "0", "max": "702", "mean": "6", "stdev": "18" }, { "name": "special_count", "index": "6", "type": "numeric", "distinct": "35", "missing": "0", "min": "0", "max": "59", "mean": "1", "stdev": "4" }, { "name": "hyphen_count", "index": "7", "type": "numeric", "distinct": "32", "missing": "0", "min": "0", "max": "32", "mean": "1", "stdev": "3" }, { "name": "double_slash", "index": "8", "type": "numeric", "distinct": "6", "missing": "0", "min": "0", "max": "6", "mean": "1", "stdev": "1" }, { "name": "single_slash", "index": "9", "type": "numeric", "distinct": "25", "missing": "0", "min": "0", "max": "25", "mean": "2", "stdev": "3" }, { "name": "at_the_rate", "index": "10", "type": "numeric", "distinct": "4", "missing": "0", "min": "0", "max": "3", "mean": "0", "stdev": "0" }, { "name": "protocol", "index": "11", "type": "numeric", "distinct": "2", "missing": "0", "min": "0", "max": "1", "mean": "0", "stdev": "0" }, { "name": "protocol_count", "index": "12", "type": "numeric", "distinct": "6", "missing": "0", "min": "0", "max": "6", "mean": "1", "stdev": "1" }, { "name": "web_traffic", "index": "13", "type": "numeric", "distinct": "2", "missing": "0", "min": "0", "max": "1", "mean": "0", "stdev": "0" }, { "name": "label", "index": "14", "type": "numeric", "distinct": "2", "missing": "0", "min": "0", "max": "1", "mean": "1", "stdev": "1" } ], "nr_of_issues": 0, "nr_of_downvotes": 0, "nr_of_likes": 0, "nr_of_downloads": 0, "total_downloads": 0, "reach": 0, "reuse": 0, "impact_of_reuse": 0, "reach_of_reuse": 0, "impact": 0 }