Upload 3 files

Browse files

Files changed (3) hide show

librkllmrt.so +2 -2
multiprocess_inference.py +219 -0
qwen.rkllm +2 -2

librkllmrt.so CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0f55d476a55680ebedc145c771d29be66334c60c0d9b9eafc8587a2bcf4fddb6
-size 6230968

 version https://git-lfs.github.com/spec/v1
+oid sha256:ac71a21e0fa68df97ab8145a0beae1c561f31d391ea78c12be675b9d34edea85
+size 6226872

multiprocess_inference.py ADDED Viewed

	@@ -0,0 +1,219 @@

+import os
+import time
+import signal
+from multiprocessing import Process, Queue, Event
+import cv2
+import numpy as np
+from rkllm_binding import *
+from rknnlite.api.rknn_lite import RKNNLite
+# 视觉编码器进程
+def vision_encoder_process(load_ready_queue, embedding_queue, img_path_queue, start_event):
+    VISION_ENCODER_PATH = "vision_transformer.rknn"
+    img_size = 448
+    # 初始化视觉编码器
+    vision_encoder = RKNNLite(verbose=False)
+    model_size = os.path.getsize(VISION_ENCODER_PATH)
+    print(f"Start loading vision encoder model (size: {model_size / 1024 / 1024:.2f} MB)")
+    start_time = time.time()
+    vision_encoder.load_rknn(VISION_ENCODER_PATH)
+    end_time = time.time()
+    print(f"Vision encoder loaded in {end_time - start_time:.2f} seconds")
+    vision_encoder.init_runtime()
+    # 通知主进程加载完成
+    load_ready_queue.put("vision_ready")
+    # 等待开始信号
+    start_event.wait()
+    def process_image(img_path, vision_encoder):
+        img = cv2.imread(img_path)
+        if img is None:
+            return None
+        print("Start vision inference...")
+        img = cv2.resize(img, (img_size, img_size))
+        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+        img = img.astype(np.float32)
+        img = img[np.newaxis, :, :, :]
+        start_time = time.time()
+        image_embeddings = vision_encoder.inference(inputs=[img], data_format="nhwc")[0].astype(np.float32)
+        end_time = time.time()
+        print(f"Vision encoder inference time: {end_time - start_time:.2f} seconds")
+        return image_embeddings
+    while True:
+        img_path = img_path_queue.get()
+        if img_path == "STOP":
+            break
+        embeddings = process_image(img_path, vision_encoder)
+        if embeddings is not None:
+            embedding_queue.put(embeddings)
+        else:
+            embedding_queue.put("ERROR")
+# LLM进程
+def llm_process(load_ready_queue, embedding_queue, prompt_queue, inference_done_queue, start_event):
+    MODEL_PATH = "/home/firefly/qwen.rkllm"
+    handle = None
+    def signal_handler(signal, frame):
+        print("Ctrl-C pressed, exiting...")
+        global handle
+        if handle:
+            abort(handle)
+            destroy(handle)
+        exit(0)
+    signal.signal(signal.SIGINT, signal_handler)
+    os.environ["RKLLM_LOG_LEVEL"] = "1"
+    inference_count = 0
+    inference_start_time = 0
+    def result_callback(result, userdata, state):
+        nonlocal inference_start_time, inference_count
+        if state == LLMCallState.RKLLM_RUN_NORMAL:
+            if inference_count == 0:
+                first_token_time = time.time()
+                print(f"Time to first token: {first_token_time - inference_start_time:.2f} seconds")
+            inference_count += 1
+            print(result.contents.text.decode(), end="", flush=True)
+        elif state == LLMCallState.RKLLM_RUN_FINISH:
+            print("\n\n(finished)")
+            inference_done_queue.put("DONE")
+        elif state == LLMCallState.RKLLM_RUN_ERROR:
+            print("\nError occurred during LLM call")
+            inference_done_queue.put("ERROR")
+    # 初始化LLM
+    param = create_default_param()
+    param.model_path = MODEL_PATH.encode()
+    param.img_start = "<image>".encode()
+    param.img_end = "</image>".encode()
+    param.img_content = "<unk>".encode()
+    extend_param = RKLLMExtendParam()
+    extend_param.base_domain_id = 1
+    param.extend_param = extend_param
+    model_size = os.path.getsize(MODEL_PATH)
+    print(f"Start loading language model (size: {model_size / 1024 / 1024:.2f} MB)")
+    start_time = time.time()
+    handle = init(param, result_callback)
+    end_time = time.time()
+    print(f"Language model loaded in {end_time - start_time:.2f} seconds")
+    # 通知主进程加载完成
+    load_ready_queue.put("llm_ready")
+    # 创建推理参数
+    infer_param = RKLLMInferParam()
+    infer_param.mode = RKLLMInferMode.RKLLM_INFER_GENERATE.value
+    while True:
+        prompt = prompt_queue.get()
+        # print(f"Received prompt: ====\n{prompt}\n====")
+        if prompt == "STOP":
+            break
+        image_embeddings = embedding_queue.get()
+        if isinstance(image_embeddings, str) and image_embeddings == "ERROR":
+            print("Error processing image")
+            continue
+        rkllm_input = create_rkllm_input(RKLLMInputType.RKLLM_INPUT_MULTIMODAL,
+                                        prompt=prompt,
+                                        image_embed=image_embeddings)
+        inference_start_time = time.time()
+        run(handle, rkllm_input, infer_param, None)
+    # 清理
+    destroy(handle)
+def main():
+    load_ready_queue = Queue()
+    embedding_queue = Queue()
+    img_path_queue = Queue()
+    prompt_queue = Queue()
+    inference_done_queue = Queue()
+    start_event = Event()
+    vision_process = Process(target=vision_encoder_process,
+                           args=(load_ready_queue, embedding_queue, img_path_queue, start_event))
+    lm_process = Process(target=llm_process,
+                        args=(load_ready_queue, embedding_queue, prompt_queue, inference_done_queue, start_event))
+    vision_process.start()
+    lm_process.start()
+    # 等待模型加载
+    ready_count = 0
+    while ready_count < 2:
+        status = load_ready_queue.get()
+        print(f"Received ready signal: {status}")
+        ready_count += 1
+    print("All models loaded, starting interactive mode...")
+    start_event.set()
+    # 交互循环
+    try:
+        while True:
+            print("""
+Enter your input (3 empty lines to start inference, Ctrl+C to exit, for example:
+详细描述一下{{./test.jpg}}这张图片
+What is the weather in {{./test.jpg}}?
+How many people are in {{./test.jpg}}?
+):
+""")
+            user_input = []
+            empty_lines = 0
+            while empty_lines < 3:
+                line = input()
+                if line.strip() == "":
+                    empty_lines += 1
+                else:
+                    empty_lines = 0
+                user_input.append(line)
+            # 解析输入
+            full_input = "\n".join(user_input[:-3])  # 去掉最后3个空行
+            import re
+            img_match = re.search(r'\{\{(.+?)\}\}', full_input)
+            if not img_match:
+                print("No image path found in input")
+                continue
+            img_path = img_match.group(1)
+            # 将图片标记替换为<image>标记
+            prompt = f"""<|im_start|>system
+You are a helpful assistant.<|im_end|>
+<|im_start|>user
+{full_input.replace(img_match.group(0), '<image>')}<|im_end|>
+<|im_start|>assistant
+"""
+            img_path_queue.put(img_path)
+            prompt_queue.put(prompt)
+            # 等待推理完成
+            status = inference_done_queue.get()
+            if status == "ERROR":
+                print("Inference failed")
+    except KeyboardInterrupt:
+        print("\nExiting...")
+        img_path_queue.put("STOP")
+        prompt_queue.put("STOP")
+    vision_process.join()
+    lm_process.join()
+if __name__ == "__main__":
+    main()

qwen.rkllm CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:34b91108056dc595a3eb4c9f340217160974adf35d4399ac5187eae6f22bb6a0
-size 8681282052

 version https://git-lfs.github.com/spec/v1
+oid sha256:8f09aed671c9dc322c4c60b1c649ab200aa76c65e7be409b8f76516dbe433dc5
+size 8189403140