import spaces import gradio as gr from huggingface_hub import InferenceClient """ For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference """ from vptq.app_utils import get_chat_loop_generator model_list=["VPTQ-community/Meta-Llama-3.1-8B-Instruct-v12-k65536-4096-woft", "VPTQ-community/Meta-Llama-3.1-70B-Instruct-v8-k32768-0-woft", "VPTQ-community/Qwen2.5-7B-Instruct-v8-k256-256-woft", "VPTQ-community/Qwen2.5-14B-Instruct-v8-k256-256-woft", "VPTQ-community/Qwen2.5-32B-Instruct-v16-k65536-65536-woft", "VPTQ-community/Qwen2.5-72B-Instruct-v8-k65536-0-woft", ] current_model_g = model_list[0] chat_completion = get_chat_loop_generator(current_model_g) @spaces.GPU def update_title_and_chatmodel(model): model = str(model) global chat_completion global current_model_g if model != current_model_g: current_model_g = model chat_completion = get_chat_loop_generator(current_model_g) return model @spaces.GPU def respond( message, history: list[tuple[str, str]], system_message, max_tokens, temperature, top_p, ): messages = [{"role": "system", "content": system_message}] for val in history: if val[0]: messages.append({"role": "user", "content": val[0]}) if val[1]: messages.append({"role": "assistant", "content": val[1]}) messages.append({"role": "user", "content": message}) response = "" for message in chat_completion( messages, max_tokens=max_tokens, stream=True, temperature=temperature, top_p=top_p, ): token = message response += token yield response css = """ h1 { text-align: center; display: block; } """ """ For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface """ chatbot = gr.Chatbot(label="Gradio ChatInterface") with gr.Blocks() as demo: with gr.Column(scale=1): title_output = gr.Markdown("Please select a model to run") chat_demo = gr.ChatInterface( respond, #chatbot=chatbot, additional_inputs_accordion=gr.Accordion( label="⚙️ Parameters", open=False, render=False ), fill_height=False, additional_inputs=[ gr.Textbox(value="You are a friendly Chatbot.", label="System message"), gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"), gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"), gr.Slider( minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)", ), ], ) model_select = gr.Dropdown( choices=model_list, label="Models", value=model_list[0], ) model_select.change(update_title_and_chatmodel, inputs=[model_select], outputs=title_output) if __name__ == "__main__": demo.launch()