import os
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
import gradio as gr

# Load the model and tokenizer from Hugging Face
model_name = "Hastika/codellama-CodeLlama-34b-Instruct-hf"  # Adjust if necessary
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

# Create a pipeline for text generation
client = pipeline("text-generation", model=model, tokenizer=tokenizer)

# System prompt
system_prompt = {
    "role": "system",
    "content": "You are a useful assistant. You reply with efficient answers."
}

# Chat function
async def chat_groq(message, history):
    messages = [system_prompt]

    # Add conversation history to messages
    for msg in history:
        messages.append({"role": "user", "content": str(msg[0])})
        messages.append({"role": "assistant", "content": str(msg[1])})

    # Add the new user message
    messages.append({"role": "user", "content": str(message)})

    # Format the conversation history as a string for the model
    conversation = "\n".join([f"{msg['role']}: {msg['content']}" for msg in messages])

    # Generate response from the model
    response_content = client(conversation, max_length=1024, do_sample=True)[0]['generated_text']

    yield response_content

# Gradio interface
with gr.Blocks(theme=gr.themes.Monochrome(), fill_height=True) as demo:
    gr.ChatInterface(chat_groq,
                     clear_btn=None, 
                     undo_btn=None, 
                     retry_btn=None,
                    )

demo.queue()
demo.launch()