Spaces:

RandomOnHuggingFace
/

AIiscool

Sleeping

App Files Files Community

AIiscool / app.py

RandomOnHuggingFace

Update app.py

d79fa5e verified about 2 months ago

raw

history blame contribute delete

1.83 kB

	import gradio as gr
	from huggingface_hub import InferenceClient

	# Initialize the Inference Client for the GPT-2 model (or "gpttrash")
	client = InferenceClient("gpt2")

	def respond(message, history, max_tokens, temperature, top_p):
	messages = []

	# Add the conversation history (user and assistant exchanges)
	for val in history:
	if val[0]:
	messages.append({"role": "user", "content": val[0]})
	if val[1]:
	messages.append({"role": "assistant", "content": val[1]})

	# Add the current user message to continue the conversation
	messages.append({"role": "user", "content": message})

	response = ""

	# Get the model's response using chat completion
	for response_chunk in client.chat_completion(
	messages,
	max_tokens=max_tokens,
	stream=True,
	temperature=temperature,
	top_p=top_p,
	):
	token = response_chunk.choices[0].delta.content
	response += token
	yield response

	# Create Gradio Blocks layout for Hugging Face Spaces
	with gr.Blocks() as demo:
	with gr.Row():
	user_input = gr.Textbox(label="User Input")
	history = gr.State() # Keeps conversation history
	with gr.Row():
	max_tokens_slider = gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens")
	temperature_slider = gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature")
	top_p_slider = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)")
	with gr.Row():
	output = gr.Textbox(label="Model Output")

	# Set up the chatbot functionality
	user_input.submit(respond, [user_input, history, max_tokens_slider, temperature_slider, top_p_slider], output)

	if __name__ == "__main__":
	demo.launch()