File size: 1,736 Bytes
01f2fb9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
import os
import sys
from llama_cpp import Llama

from util import create_prompt, is_bytecode_empty

llm = Llama(
    model_path="/home/guspuffy/projects/output-merged/GGUF/sentient-simulations-pydecompiler-3.7-6.7b-v0.9-q8_0.gguf",
    n_gpu_layers=-1,
    last_n_tokens_size=0,
    n_ctx=16384,
)


def decompile_pycb(file_path: str, force_overwrite=False):
    source_code_path = file_path[:-2]

    with open(file_path, "r") as bytecode_file:
        bytecode = bytecode_file.read()

        if is_bytecode_empty(bytecode):
            return "# Empty file"

        prompt = create_prompt(bytecode)

        tokens = llm.tokenize(text=prompt.encode("utf-8"), add_bos=True, special=True)
        if len(tokens) > 15000:
            return None

        output = llm(prompt, max_tokens=None, echo=False)
        result = output["choices"][0]
        source_code = result.get("text").strip()
        finish_reason = result.get("finish_reason")

        source_code += f"\n\n# Finish Reason: {finish_reason}\n"

        with open(source_code_path, "w") as source_code_file:
            source_code_file.write(source_code)

        return source_code


if __name__ == "__main__":
    path = sys.argv[1]
    if os.path.isfile(path):
        output = decompile_pycb(path)
        print(output)
    elif os.path.isdir(path):
        for root, dirs, files in os.walk(path):
            for file in files:
                if file.endswith(".pycb") and not os.path.exists(
                    os.path.join(root, file[:-2])
                ):
                    file_path = os.path.join(root, file)
                    output = decompile_pycb(file_path)
                    if output:
                        print(f"Decompiled {file_path}")