malware_detection / binary2image.py
santialferez
first commit
e9d7935
raw
history blame contribute delete
No virus
2.33 kB
"""Binary to Image Converter."""
import os
from argparse import ArgumentParser
from PIL import Image
from tqdm import tqdm
def get_binary_data(filename):
"""Extract byte values from binary executable file and store them in list.
:param file: executable file
:return: byte value list
"""
binary_values = []
with open(filename, "rb") as file:
while byte := file.read(1):
binary_values.append(ord(byte))
return binary_values
def get_size(data_length):
"""Obtain image size.
Source: Malware images: visualization and automatic classification.
:param data_length: Number of bytes in file
:return: size as integer tuple
"""
size = data_length
kib = 2**10
if size < 10 * kib:
width = 32
elif size < 30 * kib:
width = 64
elif size < 60 * kib:
width = 128
elif size < 100 * kib:
width = 256
elif size < 200 * kib:
width = 384
elif size < 500 * kib:
width = 512
elif size < 1000 * kib:
width = 768
else:
width = 1024
height = size // width + 1
return (width, height)
def save_file(folder, filename, data, size):
"""Save PIL image to disk.
:param folder: folder where images will be saved
:param filename: binary filename
:param data: grayscale image
:param size: image size
"""
image = Image.new("L", size)
image.putdata(data)
name, _ = os.path.splitext(filename)
name = os.path.basename(name)
imagename = os.path.join(folder, name + ".png")
image.save(imagename)
if __name__ == "__main__":
parser = ArgumentParser(
description="Transform all files in a folder into PNG images"
)
parser.add_argument("input_folder", help="Folder with the original files")
parser.add_argument(
"output_folder", help="Folder where the images will be saved"
)
args = parser.parse_args()
files = [
os.path.join(args.input_folder, bin_file)
for bin_file in os.listdir(args.input_folder)
]
os.makedirs(args.output_folder, exist_ok=True)
for bin_file in tqdm(files):
greyscale_data = get_binary_data(bin_file)
bin_size = get_size(len(greyscale_data))
save_file(args.output_folder, bin_file, greyscale_data, bin_size)