abalakrishnaTRI
commited on
Commit
•
6ba6dce
1
Parent(s):
bb834c6
support fused backbones and update MODEL_ID_TO_NAME
Browse files- interactive_demo.py +7 -1
- serve/__init__.py +13 -13
interactive_demo.py
CHANGED
@@ -152,7 +152,13 @@ class ModelWorker:
|
|
152 |
# Assume `image_transform` is a HF ImageProcessor...
|
153 |
pixel_values = self.image_processor(images[0].convert("RGB"), return_tensors="pt")["pixel_values"][0]
|
154 |
|
155 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
156 |
generated_text = generated_text.split("USER")[0].split("ASSISTANT")[0]
|
157 |
yield json.dumps({"text": ori_prompt + generated_text, "error_code": 0}).encode() + b"\0"
|
158 |
|
|
|
152 |
# Assume `image_transform` is a HF ImageProcessor...
|
153 |
pixel_values = self.image_processor(images[0].convert("RGB"), return_tensors="pt")["pixel_values"][0]
|
154 |
|
155 |
+
if type(pixel_values) is dict:
|
156 |
+
for k in pixel_values.keys():
|
157 |
+
pixel_values[k] = torch.unsqueeze(pixel_values[k].cuda(), 0)
|
158 |
+
else:
|
159 |
+
pixel_values = torch.unsqueeze(pixel_values.cuda(), 0)
|
160 |
+
|
161 |
+
generated_text = self.vlm.generate_answer(pixel_values, question_prompt)[0]
|
162 |
generated_text = generated_text.split("USER")[0].split("ASSISTANT")[0]
|
163 |
yield json.dumps({"text": ori_prompt + generated_text, "error_code": 0}).encode() + b"\0"
|
164 |
|
serve/__init__.py
CHANGED
@@ -5,31 +5,31 @@ from collections import OrderedDict
|
|
5 |
MODEL_ID_TO_NAME = OrderedDict(
|
6 |
[
|
7 |
(
|
8 |
-
"llava-lvis4v-lrv+lvis4v-lrv-resize-naive-
|
9 |
-
"
|
10 |
),
|
11 |
(
|
12 |
-
"llava-lvis4v-lrv+lvis4v-lrv-resize-naive-
|
13 |
-
"
|
14 |
),
|
15 |
(
|
16 |
-
"resize-naive-
|
17 |
-
"
|
18 |
),
|
19 |
(
|
20 |
-
"resize-naive-
|
21 |
-
"
|
22 |
),
|
23 |
(
|
24 |
-
"resize-naive-
|
25 |
-
"
|
26 |
),
|
27 |
(
|
28 |
-
"resize-naive-
|
29 |
-
"
|
30 |
),
|
31 |
-
("llava-v1.5-7b", "LLaVA 1.5: 7B"),
|
32 |
("llava-v1.5-13b", "LLaVA 1.5: 13B"),
|
|
|
33 |
]
|
34 |
)
|
35 |
|
|
|
5 |
MODEL_ID_TO_NAME = OrderedDict(
|
6 |
[
|
7 |
(
|
8 |
+
"llava-lvis4v-lrv+redux-lvis4v-lrv-resize-naive-dinosiglip-vit-so-14-384px-no-align+13b+stage-finetune+x7",
|
9 |
+
"PrismaticVLM 13B - Chat",
|
10 |
),
|
11 |
(
|
12 |
+
"llava-lvis4v-lrv+redux-lvis4v-lrv-resize-naive-dinosiglip-vit-so-14-384px-no-align+7b+stage-finetune+x7",
|
13 |
+
"PrismaticVLM 7B - Chat",
|
14 |
),
|
15 |
(
|
16 |
+
"llava-lvis4v-lrv+redux-lvis4v-lrv-resize-naive-dinosiglip-vit-so-14-384px-no-align-llama2pure+13b+stage-finetune+x7",
|
17 |
+
"PrismaticVLM 13B",
|
18 |
),
|
19 |
(
|
20 |
+
"llava-lvis4v-lrv+redux-lvis4v-lrv-resize-naive-dinosiglip-vit-so-14-384px-no-align-llama2pure+7b+stage-finetune+x7",
|
21 |
+
"PrismaticVLM 7B",
|
22 |
),
|
23 |
(
|
24 |
+
"redux-resize-naive-dinosiglip-vit-so-14-384px-no-align-llama2pure+13b+stage-finetune+x7",
|
25 |
+
"PrismaticVLM 13B (Controlled)",
|
26 |
),
|
27 |
(
|
28 |
+
"redux-resize-naive-dinosiglip-vit-so-14-384px-no-align-llama2pure+7b+stage-finetune+x7",
|
29 |
+
"PrismaticVLM 7B (Controlled)",
|
30 |
),
|
|
|
31 |
("llava-v1.5-13b", "LLaVA 1.5: 13B"),
|
32 |
+
("llava-v1.5-7b", "LLaVA 1.5: 7B"),
|
33 |
]
|
34 |
)
|
35 |
|