happyme531 commited on
Commit
194663d
·
verified ·
1 Parent(s): 7bda55c

Upload 3 files

Browse files
Files changed (3) hide show
  1. librkllmrt.so +2 -2
  2. multiprocess_inference.py +219 -0
  3. qwen.rkllm +2 -2
librkllmrt.so CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0f55d476a55680ebedc145c771d29be66334c60c0d9b9eafc8587a2bcf4fddb6
3
- size 6230968
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac71a21e0fa68df97ab8145a0beae1c561f31d391ea78c12be675b9d34edea85
3
+ size 6226872
multiprocess_inference.py ADDED
@@ -0,0 +1,219 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import time
3
+ import signal
4
+ from multiprocessing import Process, Queue, Event
5
+ import cv2
6
+ import numpy as np
7
+ from rkllm_binding import *
8
+ from rknnlite.api.rknn_lite import RKNNLite
9
+
10
+ # 视觉编码器进程
11
+ def vision_encoder_process(load_ready_queue, embedding_queue, img_path_queue, start_event):
12
+
13
+ VISION_ENCODER_PATH = "vision_transformer.rknn"
14
+ img_size = 448
15
+
16
+ # 初始化视觉编码器
17
+ vision_encoder = RKNNLite(verbose=False)
18
+ model_size = os.path.getsize(VISION_ENCODER_PATH)
19
+ print(f"Start loading vision encoder model (size: {model_size / 1024 / 1024:.2f} MB)")
20
+ start_time = time.time()
21
+ vision_encoder.load_rknn(VISION_ENCODER_PATH)
22
+ end_time = time.time()
23
+ print(f"Vision encoder loaded in {end_time - start_time:.2f} seconds")
24
+ vision_encoder.init_runtime()
25
+
26
+ # 通知主进程加载完成
27
+ load_ready_queue.put("vision_ready")
28
+
29
+ # 等待开始信号
30
+ start_event.wait()
31
+
32
+ def process_image(img_path, vision_encoder):
33
+ img = cv2.imread(img_path)
34
+ if img is None:
35
+ return None
36
+ print("Start vision inference...")
37
+ img = cv2.resize(img, (img_size, img_size))
38
+ img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
39
+ img = img.astype(np.float32)
40
+ img = img[np.newaxis, :, :, :]
41
+
42
+ start_time = time.time()
43
+ image_embeddings = vision_encoder.inference(inputs=[img], data_format="nhwc")[0].astype(np.float32)
44
+ end_time = time.time()
45
+ print(f"Vision encoder inference time: {end_time - start_time:.2f} seconds")
46
+ return image_embeddings
47
+
48
+ while True:
49
+ img_path = img_path_queue.get()
50
+ if img_path == "STOP":
51
+ break
52
+ embeddings = process_image(img_path, vision_encoder)
53
+ if embeddings is not None:
54
+ embedding_queue.put(embeddings)
55
+ else:
56
+ embedding_queue.put("ERROR")
57
+
58
+ # LLM进程
59
+ def llm_process(load_ready_queue, embedding_queue, prompt_queue, inference_done_queue, start_event):
60
+
61
+
62
+ MODEL_PATH = "/home/firefly/qwen.rkllm"
63
+ handle = None
64
+
65
+ def signal_handler(signal, frame):
66
+ print("Ctrl-C pressed, exiting...")
67
+ global handle
68
+ if handle:
69
+ abort(handle)
70
+ destroy(handle)
71
+ exit(0)
72
+
73
+ signal.signal(signal.SIGINT, signal_handler)
74
+ os.environ["RKLLM_LOG_LEVEL"] = "1"
75
+
76
+ inference_count = 0
77
+ inference_start_time = 0
78
+ def result_callback(result, userdata, state):
79
+ nonlocal inference_start_time, inference_count
80
+ if state == LLMCallState.RKLLM_RUN_NORMAL:
81
+ if inference_count == 0:
82
+ first_token_time = time.time()
83
+ print(f"Time to first token: {first_token_time - inference_start_time:.2f} seconds")
84
+ inference_count += 1
85
+ print(result.contents.text.decode(), end="", flush=True)
86
+ elif state == LLMCallState.RKLLM_RUN_FINISH:
87
+ print("\n\n(finished)")
88
+ inference_done_queue.put("DONE")
89
+ elif state == LLMCallState.RKLLM_RUN_ERROR:
90
+ print("\nError occurred during LLM call")
91
+ inference_done_queue.put("ERROR")
92
+
93
+ # 初始化LLM
94
+ param = create_default_param()
95
+ param.model_path = MODEL_PATH.encode()
96
+ param.img_start = "<image>".encode()
97
+ param.img_end = "</image>".encode()
98
+ param.img_content = "<unk>".encode()
99
+ extend_param = RKLLMExtendParam()
100
+ extend_param.base_domain_id = 1
101
+ param.extend_param = extend_param
102
+
103
+ model_size = os.path.getsize(MODEL_PATH)
104
+ print(f"Start loading language model (size: {model_size / 1024 / 1024:.2f} MB)")
105
+ start_time = time.time()
106
+ handle = init(param, result_callback)
107
+ end_time = time.time()
108
+ print(f"Language model loaded in {end_time - start_time:.2f} seconds")
109
+
110
+ # 通知主进程加载完成
111
+ load_ready_queue.put("llm_ready")
112
+
113
+ # 创建推理参数
114
+ infer_param = RKLLMInferParam()
115
+ infer_param.mode = RKLLMInferMode.RKLLM_INFER_GENERATE.value
116
+
117
+ while True:
118
+ prompt = prompt_queue.get()
119
+ # print(f"Received prompt: ====\n{prompt}\n====")
120
+ if prompt == "STOP":
121
+ break
122
+
123
+ image_embeddings = embedding_queue.get()
124
+ if isinstance(image_embeddings, str) and image_embeddings == "ERROR":
125
+ print("Error processing image")
126
+ continue
127
+
128
+ rkllm_input = create_rkllm_input(RKLLMInputType.RKLLM_INPUT_MULTIMODAL,
129
+ prompt=prompt,
130
+ image_embed=image_embeddings)
131
+
132
+ inference_start_time = time.time()
133
+ run(handle, rkllm_input, infer_param, None)
134
+
135
+ # 清理
136
+ destroy(handle)
137
+
138
+ def main():
139
+ load_ready_queue = Queue()
140
+ embedding_queue = Queue()
141
+ img_path_queue = Queue()
142
+ prompt_queue = Queue()
143
+ inference_done_queue = Queue()
144
+ start_event = Event()
145
+
146
+ vision_process = Process(target=vision_encoder_process,
147
+ args=(load_ready_queue, embedding_queue, img_path_queue, start_event))
148
+ lm_process = Process(target=llm_process,
149
+ args=(load_ready_queue, embedding_queue, prompt_queue, inference_done_queue, start_event))
150
+
151
+ vision_process.start()
152
+ lm_process.start()
153
+
154
+ # 等待模型加载
155
+ ready_count = 0
156
+ while ready_count < 2:
157
+ status = load_ready_queue.get()
158
+ print(f"Received ready signal: {status}")
159
+ ready_count += 1
160
+
161
+ print("All models loaded, starting interactive mode...")
162
+ start_event.set()
163
+
164
+ # 交互循环
165
+ try:
166
+ while True:
167
+ print("""
168
+ Enter your input (3 empty lines to start inference, Ctrl+C to exit, for example:
169
+ 详细描述一下{{./test.jpg}}这张图片
170
+ What is the weather in {{./test.jpg}}?
171
+ How many people are in {{./test.jpg}}?
172
+ ):
173
+ """)
174
+ user_input = []
175
+ empty_lines = 0
176
+
177
+ while empty_lines < 3:
178
+ line = input()
179
+ if line.strip() == "":
180
+ empty_lines += 1
181
+ else:
182
+ empty_lines = 0
183
+ user_input.append(line)
184
+
185
+ # 解析输入
186
+ full_input = "\n".join(user_input[:-3]) # 去掉最后3个空行
187
+ import re
188
+ img_match = re.search(r'\{\{(.+?)\}\}', full_input)
189
+ if not img_match:
190
+ print("No image path found in input")
191
+ continue
192
+
193
+ img_path = img_match.group(1)
194
+ # 将图片标记替换为<image>标记
195
+ prompt = f"""<|im_start|>system
196
+ You are a helpful assistant.<|im_end|>
197
+ <|im_start|>user
198
+ {full_input.replace(img_match.group(0), '<image>')}<|im_end|>
199
+ <|im_start|>assistant
200
+
201
+ """
202
+ img_path_queue.put(img_path)
203
+ prompt_queue.put(prompt)
204
+
205
+ # 等待推理完成
206
+ status = inference_done_queue.get()
207
+ if status == "ERROR":
208
+ print("Inference failed")
209
+
210
+ except KeyboardInterrupt:
211
+ print("\nExiting...")
212
+ img_path_queue.put("STOP")
213
+ prompt_queue.put("STOP")
214
+
215
+ vision_process.join()
216
+ lm_process.join()
217
+
218
+ if __name__ == "__main__":
219
+ main()
qwen.rkllm CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:34b91108056dc595a3eb4c9f340217160974adf35d4399ac5187eae6f22bb6a0
3
- size 8681282052
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f09aed671c9dc322c4c60b1c649ab200aa76c65e7be409b8f76516dbe433dc5
3
+ size 8189403140