Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -24,8 +24,8 @@ overwrite_config = {}
|
|
24 |
overwrite_config["mm_resampler_type"] = "dynamic_compressor"
|
25 |
overwrite_config["patchify_video_feature"] = False
|
26 |
overwrite_config["attn_implementation"] = "sdpa" if torch.__version__ >= "2.1.2" else "eager"
|
27 |
-
tokenizer, model, image_processor, context_len = load_pretrained_model(model_path, None, model_name, device_map="
|
28 |
-
model.to(
|
29 |
|
30 |
def preprocess_qwen(sources, tokenizer: transformers.PreTrainedTokenizer, has_image: bool = False, max_len=2048, system_message: str = "You are a helpful assistant.") -> Dict:
|
31 |
roles = {"human": "<|im_start|>user", "gpt": "<|im_start|>assistant"}
|
@@ -100,7 +100,7 @@ def oryx_inference(video, text):
|
|
100 |
conv.append_message(conv.roles[1], None)
|
101 |
prompt = conv.get_prompt()
|
102 |
|
103 |
-
input_ids = preprocess_qwen([{'from': 'human','value': question},{'from': 'gpt','value': None}], tokenizer, has_image=True).cuda
|
104 |
|
105 |
video_processed = []
|
106 |
for idx, frame in enumerate(video):
|
@@ -116,7 +116,7 @@ def oryx_inference(video, text):
|
|
116 |
if frame_idx is None:
|
117 |
frame_idx = np.arange(0, len(video_processed), dtype=int).tolist()
|
118 |
|
119 |
-
video_processed = torch.cat(video_processed, dim=0).bfloat16().cuda
|
120 |
video_processed = (video_processed, video_processed)
|
121 |
|
122 |
video_data = (video_processed, (384, 384), "video")
|
@@ -154,4 +154,4 @@ demo = gr.Interface(
|
|
154 |
)
|
155 |
|
156 |
# Launch the Gradio app
|
157 |
-
demo.launch(
|
|
|
24 |
overwrite_config["mm_resampler_type"] = "dynamic_compressor"
|
25 |
overwrite_config["patchify_video_feature"] = False
|
26 |
overwrite_config["attn_implementation"] = "sdpa" if torch.__version__ >= "2.1.2" else "eager"
|
27 |
+
tokenizer, model, image_processor, context_len = load_pretrained_model(model_path, None, model_name, device_map="cpu", overwrite_config=overwrite_config)
|
28 |
+
model.to("cuda").eval()
|
29 |
|
30 |
def preprocess_qwen(sources, tokenizer: transformers.PreTrainedTokenizer, has_image: bool = False, max_len=2048, system_message: str = "You are a helpful assistant.") -> Dict:
|
31 |
roles = {"human": "<|im_start|>user", "gpt": "<|im_start|>assistant"}
|
|
|
100 |
conv.append_message(conv.roles[1], None)
|
101 |
prompt = conv.get_prompt()
|
102 |
|
103 |
+
input_ids = preprocess_qwen([{'from': 'human','value': question},{'from': 'gpt','value': None}], tokenizer, has_image=True).to("cuda")
|
104 |
|
105 |
video_processed = []
|
106 |
for idx, frame in enumerate(video):
|
|
|
116 |
if frame_idx is None:
|
117 |
frame_idx = np.arange(0, len(video_processed), dtype=int).tolist()
|
118 |
|
119 |
+
video_processed = torch.cat(video_processed, dim=0).bfloat16().to("cuda")
|
120 |
video_processed = (video_processed, video_processed)
|
121 |
|
122 |
video_data = (video_processed, (384, 384), "video")
|
|
|
154 |
)
|
155 |
|
156 |
# Launch the Gradio app
|
157 |
+
demo.launch()
|