Spaces:
Running
on
Zero
Running
on
Zero
from transformers import AutoModelForCausalLM, AutoTokenizer | |
from huggingface_hub import login | |
import gradio as gr | |
import torch | |
login(token = os.getenv('HF_TOKEN')) | |
# Load the tokenizer and model | |
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-11B-Vision-Instruct") | |
model = AutoModelForCausalLM.from_pretrained( | |
"meta-llama/Llama-3.2-11B-Vision-Instruct", | |
device_map="auto", | |
torch_dtype="auto", | |
) | |
def generate_response(message, history): | |
inputs = tokenizer(message['text'], return_tensors="pt").to("cpu") | |
with torch.no_grad(): | |
outputs = model.generate(inputs.input_ids, max_length=100) | |
return tokenizer.decode(outputs[0], skip_special_tokens=True) | |
demo = gr.ChatInterface( | |
fn=generate_response, | |
examples=[{"text": "Hello", "files": []}], | |
title="LLAMA 3.2 Chat", | |
multimodal=True | |
) | |
demo.launch(debug = True) |