Update app.py
Browse files
app.py
CHANGED
@@ -11,11 +11,11 @@ import os
|
|
11 |
token=os.environ.get("key_")
|
12 |
ckpt = "meta-llama/Llama-3.2-11B-Vision-Instruct"
|
13 |
model = MllamaForConditionalGeneration.from_pretrained(ckpt,token=token,
|
14 |
-
torch_dtype=torch.bfloat16)
|
15 |
processor = AutoProcessor.from_pretrained(ckpt)
|
16 |
|
17 |
|
18 |
-
|
19 |
def bot_streaming(message, history, max_new_tokens=250):
|
20 |
|
21 |
txt = message["text"]
|
@@ -53,9 +53,9 @@ def bot_streaming(message, history, max_new_tokens=250):
|
|
53 |
texts = processor.apply_chat_template(messages, add_generation_prompt=True)
|
54 |
|
55 |
if images == []:
|
56 |
-
inputs = processor(text=texts, return_tensors="pt")
|
57 |
else:
|
58 |
-
inputs = processor(text=texts, images=images, return_tensors="pt")
|
59 |
streamer = TextIteratorStreamer(processor, skip_special_tokens=True, skip_prompt=True)
|
60 |
|
61 |
generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=max_new_tokens)
|
|
|
11 |
token=os.environ.get("key_")
|
12 |
ckpt = "meta-llama/Llama-3.2-11B-Vision-Instruct"
|
13 |
model = MllamaForConditionalGeneration.from_pretrained(ckpt,token=token,
|
14 |
+
torch_dtype=torch.bfloat16)#.to("cuda")
|
15 |
processor = AutoProcessor.from_pretrained(ckpt)
|
16 |
|
17 |
|
18 |
+
#@spaces.GPU
|
19 |
def bot_streaming(message, history, max_new_tokens=250):
|
20 |
|
21 |
txt = message["text"]
|
|
|
53 |
texts = processor.apply_chat_template(messages, add_generation_prompt=True)
|
54 |
|
55 |
if images == []:
|
56 |
+
inputs = processor(text=texts, return_tensors="pt").#to("cuda")
|
57 |
else:
|
58 |
+
inputs = processor(text=texts, images=images, return_tensors="pt")#.to("cuda")
|
59 |
streamer = TextIteratorStreamer(processor, skip_special_tokens=True, skip_prompt=True)
|
60 |
|
61 |
generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=max_new_tokens)
|