import gradio as gr
from transformers import pipeline, AutoTokenizer

# Define examples and model configuration
examples = [
    "Give me a recipe for pizza with pineapple",
    "Write me a tweet about the new OpenVINO release",
    "Explain the difference between CPU and GPU",
    "Give five ideas for a great weekend with family",
    "Do Androids dream of Electric sheep?",
    "Who is Dolly?",
    "Please give me advice on how to write resume?",
    "Name 3 advantages to being a cat",
    "Write instructions on how to become a good AI engineer",
    "Write a love letter to my best friend",
]

# Define the model and its tokenizer
model_name = "susnato/phi-2"  # Replace with your actual model identifier
tokenizer = AutoTokenizer.from_pretrained(model_name)
generator = pipeline("text-generation", model=model_name, tokenizer=tokenizer)

def run_generation(user_text, top_p, temperature, top_k, max_new_tokens, performance):
    prompt = f"Instruct:{user_text}\nOutput:"
    response = generator(prompt, max_length=max_new_tokens, top_p=top_p, temperature=temperature, top_k=top_k)[0]["generated_text"]
    return response, "N/A"  # Replace "N/A" with actual performance metrics if available

def reset_textbox(*args):
    return "", "", ""

def main():
    with gr.Blocks() as demo:
        gr.Markdown(
            "# Question Answering with OpenVINO\n"
            "Provide instruction which describes a task below or select among predefined examples and model writes response that performs requested task."
        )

        with gr.Row():
            with gr.Column(scale=4):
                user_text = gr.Textbox(
                    placeholder="Write an email about an alpaca that likes flan",
                    label="User instruction",
                )
                model_output = gr.Textbox(label="Model response", interactive=False)
                performance = gr.Textbox(label="Performance", lines=1, interactive=False)
                with gr.Column(scale=1):
                    button_clear = gr.Button(value="Clear")
                    button_submit = gr.Button(value="Submit")
                gr.Examples(examples, user_text)
            with gr.Column(scale=1):
                max_new_tokens = gr.Slider(
                    minimum=1,
                    maximum=1000,
                    value=256,
                    step=1,
                    interactive=True,
                    label="Max New Tokens",
                )
                top_p = gr.Slider(
                    minimum=0.05,
                    maximum=1.0,
                    value=0.92,
                    step=0.05,
                    interactive=True,
                    label="Top-p (nucleus sampling)",
                )
                top_k = gr.Slider(
                    minimum=0,
                    maximum=50,
                    value=0,
                    step=1,
                    interactive=True,
                    label="Top-k",
                )
                temperature = gr.Slider(
                    minimum=0.1,
                    maximum=5.0,
                    value=0.8,
                    step=0.1,
                    interactive=True,
                    label="Temperature",
                )

        user_text.submit(
            run_generation,
            [user_text, top_p, temperature, top_k, max_new_tokens, performance],
            [model_output, performance],
        )
        button_submit.click(
            run_generation,
            [user_text, top_p, temperature, top_k, max_new_tokens, performance],
            [model_output, performance],
        )
        button_clear.click(
            reset_textbox,
            [user_text, model_output, performance],
            [user_text, model_output, performance],
        )

    return demo

if __name__ == "__main__":
    iface = main()
    iface.launch(share=True)