{ "data_id": "43527", "name": "Malware-Analysis-Datasets-PE-Section-Headers", "exact_name": "Malware-Analysis-Datasets-PE-Section-Headers", "version": 1, "version_label": "v1.0", "description": "Introduction\nThis dataset is part of my PhD research on malware detection and classification using Deep Learning. It contains static analysis data (PE Section Headers of the .text, .code and CODE sections) extracted from the 'pe_sections' elements of Cuckoo Sandbox reports. PE malware examples were downloaded from virusshare.com. PE goodware examples were downloaded from portableapps.com and from Windows 7 x86 directories.\nFeatures\nColumn name: hash\nDescription: MD5 hash of the example\nContent: 32 bytes string\nColumn name: sizeofdata\nDescription: The size of the section on disk\nContent: Integer\nColumn name: virtualaddress\nDescription: Memory address of the first byte of the section relative to the image base\nContent: Integer\nColumn name: entropy\nDescription: Calculated entropy of the section\nContent: Float\nColumn name: virtualsize\nDescription: The size of the section when loaded into memory\nContent: Integer\nColumn name: malware\nDescription: Class\nContent: 0 (Goodware) or 1 (Malware)\nAcknowledgements\nThank you Cuckoo Sandbox for developing such an amazing dynamic analysis environment!\nThank you VirusShare! Because sharing is caring!\nCitations\nPlease refer to http:\/\/dx.doi.org\/10.21227\/2czh-es14", "format": "arff", "uploader": "Dustin Carrion", "uploader_id": 30123, "visibility": "public", "creator": null, "contributor": null, "date": "2022-03-23 13:37:02", "update_comment": null, "last_update": "2022-03-23 13:37:02", "licence": "Attribution-NonCommercial 4.0 International (CC BY-NC 4.0)", "status": "active", "error_message": null, "url": "https:\/\/www.openml.org\/data\/download\/22102352\/dataset", "kaggle_url": null, "default_target_attribute": null, "row_id_attribute": null, "ignore_attribute": "\"hash\"", "runs": 0, "suggest": { "input": [ "Malware-Analysis-Datasets-PE-Section-Headers", "Introduction This dataset is part of my PhD research on malware detection and classification using Deep Learning. It contains static analysis data (PE Section Headers of the .text, .code and CODE sections) extracted from the 'pe_sections' elements of Cuckoo Sandbox reports. PE malware examples were downloaded from virusshare.com. PE goodware examples were downloaded from portableapps.com and from Windows 7 x86 directories. Features Column name: hash Description: MD5 hash of the example Content: " ], "weight": 5 }, "qualities": { "NumberOfInstances": 43293, "NumberOfFeatures": 5, "NumberOfClasses": null, "NumberOfMissingValues": 0, "NumberOfInstancesWithMissingValues": 0, "NumberOfNumericFeatures": 5, "NumberOfSymbolicFeatures": 0, "PercentageOfBinaryFeatures": 0, "PercentageOfInstancesWithMissingValues": 0, "PercentageOfMissingValues": 0, "AutoCorrelation": null, "PercentageOfNumericFeatures": 100, "Dimensionality": 0.000115492111888758, "PercentageOfSymbolicFeatures": 0, "MajorityClassPercentage": null, "MajorityClassSize": null, "MinorityClassPercentage": null, "MinorityClassSize": null, "NumberOfBinaryFeatures": 0 }, "tags": [], "features": [ { "name": "hash", "index": "0", "type": "string", "distinct": "43144", "missing": "0", "ignore": "1" }, { "name": "size_of_data", "index": "1", "type": "numeric", "distinct": "2659", "missing": "0", "min": "0", "max": "52962816", "mean": "637981", "stdev": "1173484" }, { "name": "virtual_address", "index": "2", "type": "numeric", "distinct": "129", "missing": "0", "min": "4096", "max": "13152256", "mean": "8032", "stdev": "83685" }, { "name": "entropy", "index": "3", "type": "numeric", "distinct": "17613", "missing": "0", "min": "0", "max": "8", "mean": "6", "stdev": "1" }, { "name": "virtual_size", "index": "4", "type": "numeric", "distinct": "11715", "missing": "0", "min": "5", "max": "52962523", "mean": "644611", "stdev": "1187790" }, { "name": "malware", "index": "5", "type": "numeric", "distinct": "2", "missing": "0", "min": "0", "max": "1", "mean": "1", "stdev": "0" } ], "nr_of_issues": 0, "nr_of_downvotes": 0, "nr_of_likes": 0, "nr_of_downloads": 0, "total_downloads": 0, "reach": 0, "reuse": 0, "impact_of_reuse": 0, "reach_of_reuse": 0, "impact": 0 }