Spaces:

mikeee
/

qwen-2.5-72b-instruct

Running

File size: 2,368 Bytes

10e1692
 
f956d70
10e1692
 
05bde1f
10e1692
 
 
 
05bde1f
 
a6c3106
05bde1f
10e1692
05bde1f
 
 
 
10e1692
 
05bde1f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d1a0824
 
05bde1f
 
 
 
 
 
d1a0824
 
 
05bde1f
d1a0824
 
 
 
05bde1f
 
 
a6c3106
 
 
 
 
 
 
 
05bde1f
 
 
 
30c43ba
369961f
05bde1f
 
 
 
 
 
 
 
 
 
 
 
8dd48d6
 
30c43ba
05bde1f

"""
cf https://huggingface.co/spaces/Nymbo/Qwen-2.5-72B-Instruct/blob/main/app.py
   https://huggingface.co/spaces/prithivMLmods/Llama-3.1-8B-Instruct/blob/main/app.py
https://github.com/huggingface/huggingface-llama-recipes/blob/main/api_inference/inference-api.ipynb
"""
import gradio as gr

# from openai import OpenAI
from huggingface_hub import InferenceClient

import os

# ACCESS_TOKEN = os.getenv("HF_TOKEN")

_ = """
client = OpenAI(
    base_url="https://api-inference.huggingface.co/v1/",
    api_key=ACCESS_TOKEN,
)
# """
client = InferenceClient()

def respond(
    message,
    history: list[tuple[str, str]],
    system_message,
    max_tokens,
    temperature,
    top_p,
):
    messages = [{"role": "system", "content": system_message}]

    for val in history:
        if val[0]:
            messages.append({"role": "user", "content": val[0]})
        if val[1]:
            messages.append({"role": "assistant", "content": val[1]})

    messages.append({"role": "user", "content": message})

    response = ""
    try:
        _ = client.chat.completions.create(
        model="Qwen/Qwen2.5-72B-Instruct",
        max_tokens=max_tokens,
        stream=True,
        temperature=temperature,
        top_p=top_p,
        messages=messages,
        )
        for message in _:
            token = message.choices[0].delta.content
        
            response += token
            yield response
    except Exception as e:
        yield str(e)
        
chatbot = gr.Chatbot(height=600)

css = '''
.gradio-container{max-width: 1000px !important}
h1{text-align:center}
footer {
    visibility: hidden
}
'''

demo = gr.ChatInterface(
    respond,
    additional_inputs=[
        gr.Textbox(value="", label="System message"),
        # gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
        gr.Slider(minimum=1, maximum=32768 // 2 - 500, value=32768 // 2 - 500, step=1, label="Max new tokens"),
        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
        gr.Slider(
            minimum=0.1,
            maximum=1.0,
            value=0.95,
            step=0.05,
            label="Top-P",
        ),
        
    ],
    fill_height=True,
    chatbot=chatbot,
    css=css,
    # theme="allenai/gradio-theme",
    # theme="Nymbo/Alyx_Theme",
)
if __name__ == "__main__":
    demo.launch()