Spaces:

xianbao
/

SambaNova-fast

Running

SambaNova-fast / app.py

Xianbao QIAN

update new ui

93e4eee about 2 months ago

4.03 kB

	import gradio as gr
	import os
	from typing import Iterator
	import sambanova


	def generate(
	message: str,
	chat_history: list[tuple[str, str]],
	system_message,
	max_tokens: int = 1024,
	temperature: float = 0.6,
	top_p: float = 0.9,
	top_k: int = 50,
	repetition_penalty: float = 1.2,
	) -> Iterator[str]:

	conversation = [{"role": "system", "content": system_message}]

	for val in chat_history:
	if val[0]:
	conversation.append({"role": "user", "content": val[0]})
	if val[1]:
	conversation.append({"role": "assistant", "content": val[1]})

	outputs = []
	for text in sambanova.Streamer(conversation,
	new_tokens=max_tokens,
	temperature=temperature,
	top_k=top_k,
	top_p=top_p):
	outputs.append(text)
	yield "".join(outputs)


	MAX_MAX_TOKENS = 2048
	DEFAULT_MAX_TOKENS = 1024
	MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))

	# chat_interface = gr.ChatInterface(
	# fn=generate,
	# additional_inputs=[
	# gr.Slider(
	# label="Max new tokens",
	# minimum=1,
	# maximum=MAX_MAX_NEW_TOKENS,
	# step=1,
	# value=DEFAULT_MAX_NEW_TOKENS,
	# ),
	# gr.Slider(
	# label="Temperature",
	# minimum=0.1,
	# maximum=4.0,
	# step=0.1,
	# value=0.6,
	# ),
	# gr.Slider(
	# label="Top-p (nucleus sampling)",
	# minimum=0.05,
	# maximum=1.0,
	# step=0.05,
	# value=0.9,
	# ),
	# gr.Slider(
	# label="Top-k",
	# minimum=1,
	# maximum=1000,
	# step=1,
	# value=50,
	# ),
	# gr.Slider(
	# label="Repetition penalty",
	# minimum=1.0,
	# maximum=2.0,
	# step=0.05,
	# value=1.2,
	# ),
	# ],
	# stop_btn=None,
	# fill_height=True,
	# examples=[
	# ["Which one is bigger? 4.9 or 4.11"],
	# [
	# "Can you explain briefly to me what is the Python programming language?"
	# ],
	# ["Explain the plot of Cinderella in a sentence."],
	# ["How many hours does it take a man to eat a Helicopter?"],
	# [
	# "Write a 100-word article on 'Benefits of Open-Source in AI research'"
	# ],
	# ],
	# cache_examples=False,
	# )

	chat_interface = gr.ChatInterface(
	generate,
	additional_inputs=[
	gr.Textbox(value="You are a friendly Chatbot.",
	label="System message"),
	gr.Slider(
	label="Max tokens",
	minimum=1,
	maximum=MAX_MAX_TOKENS,
	step=1,
	value=DEFAULT_MAX_TOKENS,
	),
	gr.Slider(
	label="Temperature",
	minimum=0.1,
	maximum=4.0,
	step=0.1,
	value=0.6,
	),
	gr.Slider(
	label="Top-p (nucleus sampling)",
	minimum=0.05,
	maximum=1.0,
	step=0.05,
	value=0.9,
	),
	gr.Slider(
	label="Top-k",
	minimum=1,
	maximum=1000,
	step=1,
	value=50,
	),

	],
	examples=[
	["Which one is bigger? 4.9 or 4.11"],
	[
	"Can you explain briefly to me what is the Python programming language?"
	],
	["Explain the plot of Cinderella in a sentence."],
	["How many hours does it take a man to eat a Helicopter?"],
	[
	"Write a 100-word article on 'Benefits of Open-Source in AI research'"
	],
	],
	cache_examples=False,
	)

	with gr.Blocks() as demo:
	gr.Markdown('# Sambanova model inference LLAMA 405B')

	chat_interface.render()

	if __name__ == "__main__":
	demo.queue(max_size=20).launch()