Spaces:
Sleeping
Sleeping
File size: 2,765 Bytes
10e1692 f956d70 10e1692 1e5f1ea 05bde1f 10e1692 5e1fced 10e1692 4ddd8b4 1e5f1ea 05bde1f a6c3106 05bde1f e35c2e7 11f229b 05bde1f 11f229b 05bde1f 10e1692 e35c2e7 05bde1f d1a0824 05bde1f d1a0824 05bde1f a6c3106 05bde1f 33f1e81 11f229b 05bde1f 30c43ba 369961f 2ba3e9d 05bde1f 8dd48d6 2414024 2ba3e9d 11f229b 8dd48d6 30c43ba 05bde1f aa2e4f9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 |
"""
cf https://huggingface.co/spaces/Nymbo/Qwen-2.5-72B-Instruct/blob/main/app.py
https://huggingface.co/spaces/prithivMLmods/Llama-3.1-8B-Instruct/blob/main/app.py
https://github.com/huggingface/huggingface-llama-recipes/blob/main/api_inference/inference-api.ipynb
"""
import os
import time
import gradio as gr
from openai import OpenAI
# from huggingface_hub import InferenceClient
os.environ.update(TZ='Asia/Shanghai')
time.tzset()
# ACCESS_TOKEN = os.getenv("HF_TOKEN")
# client = InferenceClient()
# _ = """
client = OpenAI(
base_url="https://api-inference.huggingface.co/v1/",
# api_key=ACCESS_TOKEN,
api_key=os.getenv("HF_TOKEN", 'na')
)
# """
def respond(
message,
history: list[tuple[str, str]],
system_message,
max_tokens,
temperature,
top_p,
):
messages = [{"role": "system", "content": system_message}]
for val in history:
if val[0]:
messages.append({"role": "user", "content": val[0]})
if val[1]:
messages.append({"role": "assistant", "content": val[1]})
messages.append({"role": "user", "content": message})
response = ""
try:
_ = client.chat.completions.create(
model="Qwen/Qwen2.5-72B-Instruct",
max_tokens=max_tokens,
stream=True,
temperature=temperature,
top_p=top_p,
messages=messages,
)
for message in _:
token = message.choices[0].delta.content
response += token
yield response
except Exception as e:
yield str(e)
chatbot = gr.Chatbot(height=600)
css = '''
.gradio-container{max-width: 1000px !important}
h1{text-align:center}
footer {
visibility: hidden
}
'''
demo = gr.ChatInterface(
respond,
type='messages',
# description='chatbox',
additional_inputs=[
gr.Textbox(value="", label="System message"),
# gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
gr.Slider(minimum=1, maximum=32768 // 2 - 500, value=32768 // 2 - 500, step=1, label="Max new tokens"),
gr.Slider(minimum=0.1, maximum=4.0, value=0.3, step=0.1, label="Temperature"),
gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.95,
step=0.05,
label="Top-P",
),
],
fill_height=True,
chatbot=chatbot,
css=css,
# examples=[{"role": "user", "content": "Define 'deep learning' in once sentence."}],
# retry_btn="Retry", # unexpected keyword argument 'retry_btn'
# undo_btn="Undo",
# clear_btn="Clear",
# stop_btn='Cancel',
# theme="allenai/gradio-theme",
# theme="Nymbo/Alyx_Theme",
)
if __name__ == "__main__":
demo.launch() # ssr=False |