import gradio as gr # Função para carregar o modelo e executar inferência com parâmetros personalizados def predict(input_text, temperature, max_length, top_p, top_k): # Carregue o modelo model = gr.load("models/meta-llama/Llama-3.3-70B-Instruct") # Use os parâmetros passados para configurar o modelo return model(input_text, temperature=temperature, max_length=max_length, top_p=top_p, top_k=top_k) # Interface Gradio with gr.Blocks() as demo: gr.Markdown("# Modelo Llama 3.3 - Ajuste de Parâmetros") with gr.Row(): input_text = gr.Textbox(label="Texto de entrada", placeholder="Digite seu texto aqui") with gr.Row(): temperature = gr.Slider(0, 1, value=0.7, label="Temperature") max_length = gr.Slider(1, 2048, value=512, step=1, label="Comprimento Máximo") top_p = gr.Slider(0, 1, value=0.9, label="Top-p (nucleus sampling)") top_k = gr.Slider(1, 100, value=50, step=1, label="Top-k") with gr.Row(): output = gr.Textbox(label="Saída do Modelo") with gr.Row(): submit_btn = gr.Button("Executar") submit_btn.click( predict, inputs=[input_text, temperature, max_length, top_p, top_k], outputs=output, ) demo.launch()