import gradio as gr from huggingface_hub import InferenceClient # Initialize the Inference Client for the GPT-2 model (or "gpttrash") client = InferenceClient("gpt2") def respond(message, history, max_tokens, temperature, top_p): messages = [] # Add the conversation history (user and assistant exchanges) for val in history: if val[0]: messages.append({"role": "user", "content": val[0]}) if val[1]: messages.append({"role": "assistant", "content": val[1]}) # Add the current user message to continue the conversation messages.append({"role": "user", "content": message}) response = "" # Get the model's response using chat completion for response_chunk in client.chat_completion( messages, max_tokens=max_tokens, stream=True, temperature=temperature, top_p=top_p, ): token = response_chunk.choices[0].delta.content response += token yield response # Create Gradio Blocks layout for Hugging Face Spaces with gr.Blocks() as demo: with gr.Row(): user_input = gr.Textbox(label="User Input") history = gr.State() # Keeps conversation history with gr.Row(): max_tokens_slider = gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens") temperature_slider = gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature") top_p_slider = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)") with gr.Row(): output = gr.Textbox(label="Model Output") # Set up the chatbot functionality user_input.submit(respond, [user_input, history, max_tokens_slider, temperature_slider, top_p_slider], output) if __name__ == "__main__": demo.launch()