import os from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline import gradio as gr # Load the model and tokenizer from Hugging Face model_name = "Hastika/codellama-CodeLlama-34b-Instruct-hf" # Adjust if necessary tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained(model_name) # Create a pipeline for text generation client = pipeline("text-generation", model=model, tokenizer=tokenizer) # System prompt system_prompt = { "role": "system", "content": "You are a useful assistant. You reply with efficient answers." } # Chat function async def chat_groq(message, history): messages = [system_prompt] # Add conversation history to messages for msg in history: messages.append({"role": "user", "content": str(msg[0])}) messages.append({"role": "assistant", "content": str(msg[1])}) # Add the new user message messages.append({"role": "user", "content": str(message)}) # Format the conversation history as a string for the model conversation = "\n".join([f"{msg['role']}: {msg['content']}" for msg in messages]) # Generate response from the model response_content = client(conversation, max_length=1024, do_sample=True)[0]['generated_text'] yield response_content # Gradio interface with gr.Blocks(theme=gr.themes.Monochrome(), fill_height=True) as demo: gr.ChatInterface(chat_groq, clear_btn=None, undo_btn=None, retry_btn=None, ) demo.queue() demo.launch()