import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
import spaces

model_name = "MBZUAI-Paris/Atlas-Chat-27B"
model = AutoModelForCausalLM.from_pretrained(
    model_name, 
    load_in_4bit=True,
    device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained(model_name)


@spaces.GPU
def chat(input_text, history=[]):
    # Tokenize the input and generate response
    inputs = tokenizer(input_text, return_tensors="pt").to(model.device)
    outputs = model.generate(**inputs, max_new_tokens=150)
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)

    # Update the conversation history
    history.append((input_text, response))
    return history, history


iface = gr.Interface(
    fn=chat,
    inputs=[
        gr.inputs.Textbox(label="أدخل رسالتك هنا"), 
        "state"
    ],
    outputs=[
        gr.outputs.Chatbot(label="المحادثة"),  
        "state"
    ],
    live=True,
    title="دردشة أطلس",
    description="تطبيق دردشة يعمل بنموذج أطلس-شات لتوفير تفاعل ذكي وسلس",
    theme="huggingface",
    examples=[
        ["مرحباً! كيف يمكنني مساعدتك اليوم؟"],
        ["ما هي أخبار التكنولوجيا الحديثة؟"]
    ]
)

# Launch the application
iface.launch()