Spaces:
Runtime error
Runtime error
import gradio as gr | |
from transformers import AutoModelForCausalLM, AutoTokenizer | |
model_id = "deepseek-ai/DeepSeek-V3" | |
tokenizer = AutoTokenizer.from_pretrained(model_id) | |
model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", trust_remote_code=True) # device_map="auto" for GPU if available, ADD trust_remote_code=True | |
def predict(message, history): | |
conversation = [] | |
for user_msg, bot_response in history: | |
conversation.append({"role": "user", "content": user_msg}) | |
if bot_response: # Only add bot response if it exists | |
conversation.append({"role": "assistant", "content": bot_response}) | |
conversation.append({"role": "user", "content": message}) | |
inputs = tokenizer.apply_chat_template(conversation=conversation, tokenizer=tokenizer, return_tensors="pt").to("cuda" if model.device.type == 'cuda' else "cpu") # Move input to GPU if model is on GPU | |
outputs = model.generate(**inputs, max_new_tokens=512) # Adjust max_new_tokens as needed | |
response = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
# Basic cleanup (you might need more sophisticated cleaning) | |
response = response.replace("<|assistant|>", "").strip() | |
return response | |
iface = gr.ChatInterface( | |
fn=predict, | |
inputs=gr.Chatbox(placeholder="Type a message..."), | |
outputs=gr.Chatbot(), | |
title="DeepSeek-V3 Chatbot", | |
description="Chat with the DeepSeek-V3 model.", | |
) | |
iface.launch() |