##### `๐Ÿ’โ€ Open_Assistant.py` ##### https://huggingface.co/spaces/olivierdehaene/chat-llm-streaming/blob/main/README.md ##### https://huggingface.co/OpenAssistant/oasst-sft-1-pythia-12b ##### Please reach out to ben@benbox.org for any questions #### Loading needed Python libraries import streamlit as st import os from text_generation import Client, InferenceAPIClient from text_generation import InferenceAPIClient st.header('๐Ÿ’โ€ Open Assistant LLM') st.write('This is the first iteration English supervised-fine-tuning (SFT) model of the Open-Assistant project. It is based on a Pythia 12B that was fine-tuned on ~22k human demonstrations of assistant conversations collected through the https://open-assistant.io/ human feedback web app before March 7, 2023.') st.write('Question: :green[Why is the sky blue?]') client = InferenceAPIClient("OpenAssistant/oasst-sft-1-pythia-12b") text = client.generate("<|prompter|>Why is the sky blue?<|endoftext|><|assistant|>").generated_text st.write('Answer: :green[' + str(text) + ']') # Token Streaming #text = "" #for response in client.generate_stream("<|prompter|>Why is the sky blue?<|endoftext|><|assistant|>"): # if not response.token.special: # print(response.token.text) # text += response.token.text #st.write(text) # # openchat_preprompt = ( # "\n: Hi!\n: My name is Bot, model version is 0.15, part of an open-source kit for " # "fine-tuning new bots! I was created by Together, LAION, and Ontocord.ai and the open-source " # "community. I am not human, not evil and not alive, and thus have no thoughts and feelings, " # "but I am programmed to be helpful, polite, honest, and friendly.\n" # ) # # # def get_client(model: str): # if model == "togethercomputer/GPT-NeoXT-Chat-Base-20B": # return Client(os.getenv("OPENCHAT_API_URL")) # return InferenceAPIClient(model, token = os.getenv("HF_TOKEN", None)) # # # def get_usernames(model: str): # """ # Returns: # (str, str, str, str): pre-prompt, username, bot name, separator # """ # if model == "OpenAssistant/oasst-sft-1-pythia-12b": # return "", "<|prompter|>", "<|assistant|>", "<|endoftext|>" # if model == "togethercomputer/GPT-NeoXT-Chat-Base-20B": # return openchat_preprompt, ": ", ": ", "\n" # return "", "User: ", "Assistant: ", "\n" # # # def predict( # model: str, # inputs: str, # typical_p: float, # top_p: float, # temperature: float, # top_k: int, # repetition_penalty: float, # watermark: bool, # chatbot, # history, # ): # client = get_client(model) # preprompt, user_name, assistant_name, sep = get_usernames(model) # # history.append(inputs) # # past = [] # for data in chatbot: # user_data, model_data = data # # if not user_data.startswith(user_name): # user_data = user_name + user_data # if not model_data.startswith(sep + assistant_name): # model_data = sep + assistant_name + model_data # # past.append(user_data + model_data.rstrip() + sep) # # if not inputs.startswith(user_name): # inputs = user_name + inputs # # total_inputs = preprompt + "".join(past) + inputs + sep + assistant_name.rstrip() # # partial_words = "" # # if model == "OpenAssistant/oasst-sft-1-pythia-12b": # iterator = client.generate_stream( # total_inputs, # typical_p = typical_p, # truncate = 1000, # watermark = watermark, # max_new_tokens = 500, # ) # else: # iterator = client.generate_stream( # total_inputs, # top_p = top_p if top_p < 1.0 else None, # top_k = top_k, # truncate = 1000, # repetition_penalty = repetition_penalty, # watermark = watermark, # temperature = temperature, # max_new_tokens = 500, # stop_sequences = [user_name.rstrip(), assistant_name.rstrip()], # ) # # for i, response in enumerate(iterator): # if response.token.special: # continue # # partial_words = partial_words + response.token.text # if partial_words.endswith(user_name.rstrip()): # partial_words = partial_words.rstrip(user_name.rstrip()) # if partial_words.endswith(assistant_name.rstrip()): # partial_words = partial_words.rstrip(assistant_name.rstrip()) # # if i == 0: # history.append(" " + partial_words) # elif response.token.text not in user_name: # history[-1] = partial_words # # chat = [ # (history[i].strip(), history[i + 1].strip()) # for i in range(0, len(history) - 1, 2) # ] # yield chat, history # # # def reset_textbox(): # return gr.update(value = "") # # # def radio_on_change( # value: str, # disclaimer, # typical_p, # top_p, # top_k, # temperature, # repetition_penalty, # watermark, # ): # if value == "OpenAssistant/oasst-sft-1-pythia-12b": # typical_p = typical_p.update(value = 0.2, visible = True) # top_p = top_p.update(visible = False) # top_k = top_k.update(visible = False) # temperature = temperature.update(visible = False) # disclaimer = disclaimer.update(visible = False) # repetition_penalty = repetition_penalty.update(visible = False) # watermark = watermark.update(False) # elif value == "togethercomputer/GPT-NeoXT-Chat-Base-20B": # typical_p = typical_p.update(visible = False) # top_p = top_p.update(value = 0.25, visible = True) # top_k = top_k.update(value = 50, visible = True) # temperature = temperature.update(value = 0.6, visible = True) # repetition_penalty = repetition_penalty.update(value = 1.01, visible = True) # watermark = watermark.update(False) # disclaimer = disclaimer.update(visible = True) # else: # typical_p = typical_p.update(visible = False) # top_p = top_p.update(value = 0.95, visible = True) # top_k = top_k.update(value = 4, visible = True) # temperature = temperature.update(value = 0.5, visible = True) # repetition_penalty = repetition_penalty.update(value = 1.03, visible = True) # watermark = watermark.update(True) # disclaimer = disclaimer.update(visible = False) # return ( # disclaimer, # typical_p, # top_p, # top_k, # temperature, # repetition_penalty, # watermark, # ) # # # title = """

๐Ÿ”ฅLarge Language Model API ๐Ÿš€Streaming๐Ÿš€

""" # description = """Language models can be conditioned to act like dialogue agents through a conversational prompt that typically takes the form: # ``` # User: # Assistant: # User: # Assistant: # ... # ``` # In this app, you can explore the outputs of multiple LLMs when prompted in this way. # """ # # openchat_disclaimer = """ #
Checkout the official OpenChatKit feedback app for the full experience.
# """ # # with gr.Blocks( # css = """#col_container {margin-left: auto; margin-right: auto;} # #chatbot {height: 520px; overflow: auto;}""" # ) as demo: # gr.HTML(title) # with gr.Column(elem_id = "col_container"): # model = gr.Radio( # value = "OpenAssistant/oasst-sft-1-pythia-12b", # choices = [ # "OpenAssistant/oasst-sft-1-pythia-12b", # # "togethercomputer/GPT-NeoXT-Chat-Base-20B", # "google/flan-t5-xxl", # "google/flan-ul2", # "bigscience/bloom", # "bigscience/bloomz", # "EleutherAI/gpt-neox-20b", # ], # label = "Model", # interactive = True, # ) # # chatbot = gr.Chatbot(elem_id = "chatbot") # inputs = gr.Textbox( # placeholder = "Hi there!", label = "Type an input and press Enter" # ) # disclaimer = gr.Markdown(openchat_disclaimer, visible = False) # state = gr.State([]) # b1 = gr.Button() # # with gr.Accordion("Parameters", open = False): # typical_p = gr.Slider( # minimum = -0, # maximum = 1.0, # value = 0.2, # step = 0.05, # interactive = True, # label = "Typical P mass", # ) # top_p = gr.Slider( # minimum = -0, # maximum = 1.0, # value = 0.25, # step = 0.05, # interactive = True, # label = "Top-p (nucleus sampling)", # visible = False, # ) # temperature = gr.Slider( # minimum = -0, # maximum = 5.0, # value = 0.6, # step = 0.1, # interactive = True, # label = "Temperature", # visible = False, # ) # top_k = gr.Slider( # minimum = 1, # maximum = 50, # value = 50, # step = 1, # interactive = True, # label = "Top-k", # visible = False, # ) # repetition_penalty = gr.Slider( # minimum = 0.1, # maximum = 3.0, # value = 1.03, # step = 0.01, # interactive = True, # label = "Repetition Penalty", # visible = False, # ) # watermark = gr.Checkbox(value = False, label = "Text watermarking") # # model.change( # lambda value: radio_on_change( # value, # disclaimer, # typical_p, # top_p, # top_k, # temperature, # repetition_penalty, # watermark, # ), # inputs = model, # outputs = [ # disclaimer, # typical_p, # top_p, # top_k, # temperature, # repetition_penalty, # watermark, # ], # ) # # inputs.submit( # predict, # [ # model, # inputs, # typical_p, # top_p, # temperature, # top_k, # repetition_penalty, # watermark, # chatbot, # state, # ], # [chatbot, state], # ) # b1.click( # predict, # [ # model, # inputs, # typical_p, # top_p, # temperature, # top_k, # repetition_penalty, # watermark, # chatbot, # state, # ], # [chatbot, state], # ) # b1.click(reset_textbox, [], [inputs]) # inputs.submit(reset_textbox, [], [inputs]) # # gr.Markdown(description) # demo.queue(concurrency_count = 16).launch(debug = True)