import streamlit as st
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import AutoPeftModelForCausalLM


# Load the model and tokenizer
model_name = "amiguel/itemClassification_Alpaca_Mistral"
model = AutoPeftModelForCausalLM.from_pretrained(model_name,  load_in_4bit = load_in_4bit,)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Set the device (GPU or CPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Create a Streamlit app
st.title("GPT2 Text Generation App")

# Create input fields for the prompt
prompt = st.text_input("Enter the prompt:")
max_length = st.number_input("Enter the maximum length of the generated text:", value=100)
num_beams = st.number_input("Enter the number of beams:", value=4)

# Create an output field
output_field = st.empty()

# Define the inference function
def infer():
    # Tokenize the prompt
    inputs = tokenizer([prompt], return_tensors="pt", max_length=max_length, truncation=True)
    # Move the inputs to the device
    inputs = {k: v.to(device) for k, v in inputs.items()}
    # Run inference
    outputs = model.generate(**inputs, max_length=max_length, num_beams=num_beams)
    # Convert the output to a string
    output_str = tokenizer.decode(outputs[0], skip_special_tokens=True)
    # Display the output
    output_field.text(output_str)

# Create a button to trigger the inference
infer_button = st.button("Generate Text")

# Run the inference function when the button is clicked
if infer_button:
    infer()