import streamlit as st from transformers import pipeline from huggingface_hub import login import os from dotenv import load_dotenv # Replace 'your_token_here' with your actual Hugging Face token token = os.getenv('hf_token') # Log in using the token login(token) # Initialize the text generation pipeline with optimizations pipe = pipeline( "text-generation", model="meta-llama/Llama-3.2-1B", device=-1, # Ensure it runs on CPU use_fast=True, # Use fast tokenizer ) # Streamlit app st.title("Llama3.2-1B") # Text input from the user user_input = st.text_input("Enter your message:", "Delete this and write your query?") # Generate text when the button is clicked if st.button("Generate"): messages = [{"role": "user", "content": user_input}] # Reduce max_new_tokens for faster generation output = pipe(messages, max_new_tokens=150) # Adjust as needed for speed generated_text = output[0]['generated_text'] # Display the generated text st.write("Generated Response:") st.write(generated_text)