Spaces:

wasmdashai
/

meta-llama-Llama-3.2-11B-Vision-Instruct

Sleeping

wasmdashai commited on Nov 9, 2024

Commit

95104cf

verified ·

1 Parent(s): bd303e3

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -11,11 +11,11 @@ import os
 token=os.environ.get("key_")
 ckpt = "meta-llama/Llama-3.2-11B-Vision-Instruct"
 model = MllamaForConditionalGeneration.from_pretrained(ckpt,token=token,
-    torch_dtype=torch.bfloat16).to("cuda")
 processor = AutoProcessor.from_pretrained(ckpt)
-@spaces.GPU
 def bot_streaming(message, history, max_new_tokens=250):
     txt = message["text"]
@@ -53,9 +53,9 @@ def bot_streaming(message, history, max_new_tokens=250):
     texts = processor.apply_chat_template(messages, add_generation_prompt=True)
     if images == []:
-        inputs = processor(text=texts, return_tensors="pt").to("cuda")
     else:
-        inputs = processor(text=texts, images=images, return_tensors="pt").to("cuda")
     streamer = TextIteratorStreamer(processor, skip_special_tokens=True, skip_prompt=True)
     generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=max_new_tokens)

 token=os.environ.get("key_")
 ckpt = "meta-llama/Llama-3.2-11B-Vision-Instruct"
 model = MllamaForConditionalGeneration.from_pretrained(ckpt,token=token,
+    torch_dtype=torch.bfloat16)#.to("cuda")
 processor = AutoProcessor.from_pretrained(ckpt)
+#@spaces.GPU
 def bot_streaming(message, history, max_new_tokens=250):
     txt = message["text"]
     texts = processor.apply_chat_template(messages, add_generation_prompt=True)
     if images == []:
+        inputs = processor(text=texts, return_tensors="pt").#to("cuda")
     else:
+        inputs = processor(text=texts, images=images, return_tensors="pt")#.to("cuda")
     streamer = TextIteratorStreamer(processor, skip_special_tokens=True, skip_prompt=True)
     generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=max_new_tokens)