import gradio as gr import os from typing import List import logging import urllib.request from utils import model_name_mapping, urial_template, openai_base_request, chat_template, openai_chat_request from constant import js_code_label, my_css, HEADER_MD, BASE_TO_ALIGNED, MODELS from openai import OpenAI import datetime # add logging info to console logging.basicConfig(level=logging.INFO) URIAL_VERSION = "inst_1k_v4.help" URIAL_URL = f"https://raw.githubusercontent.com/Re-Align/URIAL/main/urial_prompts/{URIAL_VERSION}.txt" urial_prompt = urllib.request.urlopen(URIAL_URL).read().decode('utf-8') urial_prompt = urial_prompt.replace("```", '"""') # new version of URIAL uses """ instead of ``` STOP_STRS = ['"""', '# Query:', '# Answer:'] addr_limit_counter = {} LAST_UPDATE_TIME = datetime.datetime.now() models = MODELS # mega_hist = { # "base": [], # "aligned": [] # } def respond( message, history: list[tuple[str, str]], max_tokens, temperature, top_p, rp, model_name, model_type, api_key, request:gr.Request ): global STOP_STRS, urial_prompt, LAST_UPDATE_TIME, addr_limit_counter assert model_type in ["base", "aligned"] # if history: # if model_type == "base": # mega_hist["base"] = history # else: # mega_hist["aligned"] = history if model_type == "base": prompt = urial_template(urial_prompt, history, message) else: messages = chat_template(history, message) # _model_name = "meta-llama/Llama-3-8b-hf" _model_name = model_name_mapping(model_name) if api_key and len(api_key) == 64: api_key = api_key else: api_key = None # headers = request.headers # if already 24 hours passed, reset the counter if datetime.datetime.now() - LAST_UPDATE_TIME > datetime.timedelta(days=1): addr_limit_counter = {} LAST_UPDATE_TIME = datetime.datetime.now() host_addr = request.client.host if host_addr not in addr_limit_counter: addr_limit_counter[host_addr] = 0 if addr_limit_counter[host_addr] > 100: return "You have reached the limit of 100 requests for today. Please use your own API key." if model_type == "base": infer_request = openai_base_request(prompt=prompt, model=_model_name, temperature=temperature, max_tokens=max_tokens, top_p=top_p, repetition_penalty=rp, stop=STOP_STRS, api_key=api_key) else: infer_request = openai_chat_request(messages=messages, model=_model_name, temperature=temperature, max_tokens=max_tokens, top_p=top_p, repetition_penalty=rp, stop=STOP_STRS, api_key=api_key) addr_limit_counter[host_addr] += 1 logging.info(f"Requesting chat completion from OpenAI API with model {_model_name}") logging.info(f"addr_limit_counter: {addr_limit_counter}; Last update time: {LAST_UPDATE_TIME};") response = "" for msg in infer_request: # print(msg.choices[0].delta.keys()) if hasattr(msg.choices[0], "delta"): # Note: 'ChoiceDelta' object may or may not be not subscriptable if "content" in msg.choices[0].delta: token = msg.choices[0].delta["content"] else: token = msg.choices[0].delta.content else: token = msg.choices[0].text if model_type == "base": should_stop = False for _stop in STOP_STRS: if _stop in response + token: should_stop = True break if should_stop: break if token is None: continue response += token if model_type == "base": if response.endswith('\n"'): response = response[:-1] elif response.endswith('\n""'): response = response[:-2] yield history + [(message, response)] # mega_hist[model_type].append((message, response)) # yield mega_hist[model_type] def load_models(base_model_name): print(f"base_model_name={base_model_name}") out_box = [gr.Chatbot(), gr.Chatbot(), gr.Dropdown()] out_box[0] = (gr.update(label=f"Chat with Base LLM: {base_model_name}")) aligned_model_name = BASE_TO_ALIGNED[base_model_name] out_box[1] = (gr.update(label=f"Chat with Aligned LLM: {aligned_model_name}")) out_box[2] = (gr.update(value=aligned_model_name, interactive=False)) return out_box[0], out_box[1], out_box[2] def clear_fn(): # mega_hist["base"] = [] # mega_hist["aligned"] = [] return None, None, None with gr.Blocks(gr.themes.Soft(), js=js_code_label, css=my_css) as demo: api_key = gr.Textbox(label="πŸ”‘ APIKey", placeholder="Enter your Together/Hyperbolic API Key. Leave it blank to use our key with limited usage.", type="password", elem_id="api_key", visible=False) gr.Markdown(HEADER_MD) with gr.Row(): chat_a = gr.Chatbot(height=500, label="Chat with Base LLMs via URIAL") chat_b = gr.Chatbot(height=500, label="Chat with Aligned LLMs") with gr.Group(): with gr.Row(): with gr.Column(scale=1.5): message = gr.Textbox(label="Prompt", placeholder="Enter your message here") with gr.Row(): with gr.Column(scale=2): with gr.Row(): left_model_choice = gr.Dropdown(label="Base Model", choices=models, interactive=True) right_model_choice = gr.Textbox(label="Aligned Model", placeholder="xxx", visible=True) with gr.Row(): btn = gr.Button("πŸš€ Chat") # gr.Markdown("---") with gr.Row(): stop_btn = gr.Button("⏸️ Stop") clear_btn = gr.Button("πŸ” Clear") with gr.Row(): gr.Markdown(">> - We thank for the support of Llama-3.1-405B from [Hyperbolic AI](https://hyperbolic.xyz/). ") with gr.Column(scale=1): with gr.Accordion("βš™οΈ Params for **Base** LLM", open=True): with gr.Row(): max_tokens_1 = gr.Slider(label="Max tokens", value=256, minimum=0, maximum=2048, step=16, interactive=True, visible=True) temperature_1 = gr.Slider(label="Temperature", step=0.01, minimum=0.01, maximum=1.0, value=0.9) with gr.Row(): top_p_1 = gr.Slider(label="Top-P", step=0.01, minimum=0.01, maximum=1.0, value=0.9) rp_1 = gr.Slider(label="Repetition Penalty", step=0.1, minimum=0.1, maximum=2.0, value=1.1) with gr.Accordion("βš™οΈ Params for **Aligned** LLM", open=True): with gr.Row(): max_tokens_2 = gr.Slider(label="Max tokens", value=256, minimum=0, maximum=2048, step=16, interactive=True, visible=True) temperature_2 = gr.Slider(label="Temperature", step=0.01, minimum=0.01, maximum=1.0, value=0.9) with gr.Row(): top_p_2 = gr.Slider(label="Top-P", step=0.01, minimum=0.01, maximum=1.0, value=0.9) rp_2 = gr.Slider(label="Repetition Penalty", step=0.1, minimum=0.1, maximum=2.0, value=1.0) left_model_choice.value = "Llama-3.1-405B-FP8" right_model_choice.value = "Llama-3.1-405B-Instruct-FP16" left_model_choice.change(load_models, [left_model_choice], [chat_a, chat_b, right_model_choice]) model_type_left = gr.Textbox(visible=False, value="base") model_type_right = gr.Textbox(visible=False, value="aligned") go1 = btn.click(respond, [message, chat_a, max_tokens_1, temperature_1, top_p_1, rp_1, left_model_choice, model_type_left, api_key], chat_a) go2 = btn.click(respond, [message, chat_b, max_tokens_2, temperature_2, top_p_2, rp_2, right_model_choice, model_type_right, api_key], chat_b) stop_btn.click(None, None, None, cancels=[go1, go2]) clear_btn.click(clear_fn, None, [message, chat_a, chat_b]) if __name__ == "__main__": demo.launch(show_api=False)