import gradio as gr
from huggingface_hub import InferenceClient

# Expanded list of models that should run efficiently on Hugging Face and hardware
MODEL_CHOICES = {
    "GPT-Neo 1.3B": "EleutherAI/gpt-neo-1.3B",  # Highly efficient and creative
    "GPT-Neo 2.7B": "EleutherAI/gpt-neo-2.7B",  # A bit larger but manageable
    "GPT-J 6B": "EleutherAI/gpt-j-6B",  # Larger but still reasonable
    "DialoGPT Medium": "microsoft/DialoGPT-medium",  # Fine-tuned for conversations
    "DialoGPT Large": "microsoft/DialoGPT-large",  # Fine-tuned for conversations
    "GPT-2": "gpt2",  # Lightweight and fast
    "GPT-2 Medium": "gpt2-medium",  # Lightweight and faster option
    "Zephyr-7B Beta": "HuggingFaceH4/zephyr-7b-beta",  # Large, high-quality model
    "BART Base": "facebook/bart-base",  # Effective for text generation and summarization
    "T5 Small": "t5-small",  # Fast and efficient
}

def get_model(model_name: str):
    # Select the model based on user choice
    return InferenceClient(MODEL_CHOICES.get(model_name, "EleutherAI/gpt-neo-1.3B"))

# Default model
current_client = get_model("GPT-Neo 1.3B")

def filter_response(response: str) -> str:
    inappropriate_keywords = [
        "love you", "cute", "i love u", "sweetheart", "baby", 
        "porn", "flirt", "kiss", "hug", "freaky", "relationship", "personal"
    ]
    
    for word in inappropriate_keywords:
        if word.lower() in response.lower():
            return "I'm here to help with your questions in a professional manner."

    return response

def respond(
    message,
    history: list[tuple[str, str]],
    system_message,
    max_tokens,
    temperature,
    top_p,
    model_name,
):
    global current_client
    current_client = get_model(model_name)  # Update the model based on selection
    
    messages = [{"role": "system", "content": system_message}]
    
    for val in history:
        if val[0]:
            messages.append({"role": "user", "content": val[0]})
        if val[1]:
            messages.append({"role": "assistant", "content": val[1]})

    messages.append({"role": "user", "content": message})

    response = ""
    for message in current_client.chat_completion(
        messages,
        max_tokens=max_tokens,
        stream=True,
        temperature=temperature,
        top_p=top_p,
    ):
        token = message.choices[0].delta.content
        response += token
    
    # Apply the filter before returning the response
    response = filter_response(response)
    yield response


# Adding the model chooser input
model_dropdown = gr.Dropdown(
    choices=list(MODEL_CHOICES.keys()),
    value="GPT-Neo 1.3B",  # Default model
    label="Choose Model"
)

demo = gr.ChatInterface(
    respond,
    additional_inputs=[
        gr.Textbox(value="You are a helpful assistant and should remain professional and polite at all times.", label="System message"),
        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
        gr.Slider(
            minimum=0.1,
            maximum=1.0,
            value=0.95,
            step=0.05,
            label="Top-p (nucleus sampling)",
        ),
        model_dropdown,  # Include model selection
    ],
)

if __name__ == "__main__":
    demo.launch()