import os import gradio as gr from huggingface_hub import login from transformers import AutoModelForCausalLM, AutoTokenizer import torch from transformers import pipeline # Fetch API token from environment variable api_token = os.getenv("Llama_Token") # Authenticate with Hugging Face login(api_token) # Load LLaMA 3.2 model and tokenizer with the API token model_name = "meta-llama/Llama-3.2-1B" tokenizer = AutoTokenizer.from_pretrained(model_name, token=api_token) model = AutoModelForCausalLM.from_pretrained(model_name, token=api_token) # Define the function to generate text def generate_text(prompt, max_length=100, temperature=0.7): inputs = tokenizer(prompt, return_tensors="pt") output = model.generate( inputs['input_ids'], max_length=max_length, temperature=temperature, pad_token_id=tokenizer.eos_token_id ) return tokenizer.decode(output[0], skip_special_tokens=True) # Create the Gradio interface iface = gr.Interface( fn=generate_text, inputs=[ gr.Textbox(label="Enter your prompt", placeholder="Start typing...", lines=5), gr.Slider(minimum=50, maximum=200, value=100, step=1, label="Max Length"), gr.Slider(minimum=0.1, maximum=1.0, value=0.7, step=0.1, label="Temperature"), ], outputs="text", title="LLaMA 3.2 Text Generator", description="Generate text using the LLaMA 3.2 model. Adjust the settings and input a prompt to generate responses.", ) # Launch the Gradio app iface.launch(share=True)