import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
import gradio as gr

tokenizer = AutoTokenizer.from_pretrained("microsoft/phi-2", trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
    "microsoft/phi-2",
    torch_dtype=torch.float32,
    device_map="cpu",
    trust_remote_code=True,
)


def generate(prompt, length):
    inputs = tokenizer(prompt, return_tensors="pt", return_attention_mask=False)
    if length < len(inputs):
        length = len(inputs)
    outputs = model.generate(**inputs, max_length=length)
    return tokenizer.batch_decode(outputs)[0]


demo = gr.Interface(
    fn=generate,
    inputs=[
        gr.Text(
            label="prompt",
            value="Write a detailed analogy between mathematics and a lighthouse.",
        ),
        gr.Number(value=50, label="max length", maximum=200),
    ],
    outputs="text",
)


if __name__ == "__main__":
    demo.launch(show_api=False)