|
import os |
|
import sys |
|
from llama_cpp import Llama |
|
|
|
from util import create_prompt, is_bytecode_empty |
|
|
|
llm = Llama( |
|
model_path="sentient-simulations-pydecompiler-3.7-6.7b-v0.9-q8_0.gguf", |
|
n_gpu_layers=-1, |
|
last_n_tokens_size=0, |
|
n_ctx=16384, |
|
) |
|
|
|
|
|
def decompile_pycb(file_path: str, force_overwrite=False): |
|
source_code_path = file_path[:-2] |
|
|
|
with open(file_path, "r") as bytecode_file: |
|
bytecode = bytecode_file.read() |
|
|
|
if is_bytecode_empty(bytecode): |
|
return "# Empty file" |
|
|
|
prompt = create_prompt(bytecode) |
|
|
|
tokens = llm.tokenize(text=prompt.encode("utf-8"), add_bos=True, special=True) |
|
if len(tokens) > 15000: |
|
return None |
|
|
|
output = llm(prompt, max_tokens=None, echo=False) |
|
result = output["choices"][0] |
|
source_code = result.get("text").strip() |
|
finish_reason = result.get("finish_reason") |
|
|
|
source_code += f"\n\n# Finish Reason: {finish_reason}\n" |
|
|
|
with open(source_code_path, "w") as source_code_file: |
|
source_code_file.write(source_code) |
|
|
|
return source_code |
|
|
|
|
|
if __name__ == "__main__": |
|
path = sys.argv[1] |
|
if os.path.isfile(path): |
|
output = decompile_pycb(path) |
|
print(output) |
|
elif os.path.isdir(path): |
|
for root, dirs, files in os.walk(path): |
|
for file in files: |
|
if file.endswith(".pycb") and not os.path.exists( |
|
os.path.join(root, file[:-2]) |
|
): |
|
file_path = os.path.join(root, file) |
|
output = decompile_pycb(file_path) |
|
if output: |
|
print(f"Decompiled {file_path}") |
|
|