import streamlit as st
from transformers import pipeline
from huggingface_hub import login
import os
from dotenv import load_dotenv

# Replace 'your_token_here' with your actual Hugging Face token
token = os.getenv('hf_token')

# Log in using the token
login(token)

# Initialize the text generation pipeline with optimizations
pipe = pipeline(
    "text-generation",
    model="meta-llama/Llama-3.2-1B",
    device=-1,  # Ensure it runs on CPU
    use_fast=True,  # Use fast tokenizer
)

# Streamlit app
st.title("Llama3.2-1B")

# Text input from the user
user_input = st.text_input("Enter your message:", "Delete this and write your query?")

# Generate text when the button is clicked
if st.button("Generate"):
    messages = [{"role": "user", "content": user_input}]
    # Reduce max_new_tokens for faster generation
    output = pipe(messages, max_new_tokens=150)  # Adjust as needed for speed
    generated_text = output[0]['generated_text']
    
    # Display the generated text
    st.write("Generated Response:")
    st.write(generated_text)