Spaces:
Runtime error
Runtime error
import os | |
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline | |
import gradio as gr | |
# Load the model and tokenizer from Hugging Face | |
model_name = "Hastika/codellama-CodeLlama-34b-Instruct-hf" # Adjust if necessary | |
tokenizer = AutoTokenizer.from_pretrained(model_name) | |
model = AutoModelForCausalLM.from_pretrained(model_name) | |
# Create a pipeline for text generation | |
client = pipeline("text-generation", model=model, tokenizer=tokenizer) | |
# System prompt | |
system_prompt = { | |
"role": "system", | |
"content": "You are a useful assistant. You reply with efficient answers." | |
} | |
# Chat function | |
async def chat_groq(message, history): | |
messages = [system_prompt] | |
# Add conversation history to messages | |
for msg in history: | |
messages.append({"role": "user", "content": str(msg[0])}) | |
messages.append({"role": "assistant", "content": str(msg[1])}) | |
# Add the new user message | |
messages.append({"role": "user", "content": str(message)}) | |
# Format the conversation history as a string for the model | |
conversation = "\n".join([f"{msg['role']}: {msg['content']}" for msg in messages]) | |
# Generate response from the model | |
response_content = client(conversation, max_length=1024, do_sample=True)[0]['generated_text'] | |
yield response_content | |
# Gradio interface | |
with gr.Blocks(theme=gr.themes.Monochrome(), fill_height=True) as demo: | |
gr.ChatInterface(chat_groq, | |
clear_btn=None, | |
undo_btn=None, | |
retry_btn=None, | |
) | |
demo.queue() | |
demo.launch() | |