Qwen2.5-72B / app.py
mimifuel2018's picture
Update app.py
c126a1f verified
raw
history blame
4.6 kB
import os
import gradio as gr
from http import HTTPStatus
from typing import List, Optional, Tuple, Dict
import dashscope
from dashscope import Generation
from dashscope.api_entities.dashscope_response import Role
# Configuration
default_system = 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.'
dashscope.api_key = os.getenv('HF_TOKEN') # Replace 'YOUR_API_TOKEN' with your actual API token.
# Typing definitions
History = List[Tuple[str, str]]
Messages = List[Dict[str, str]]
# Function to log chat history to logs.txt
def log_history_to_file(query: str, response: str, file_path="logs.txt"):
with open(file_path, "a") as f:
f.write(f"User: {query}\n")
f.write(f"Assistant: {response}\n\n")
# Function to clear session history
def clear_session() -> History:
return '', []
# Function to modify system session prompt
def modify_system_session(system: str) -> str:
if not system:
system = default_system
return system, system, []
# Convert history to messages format
def history_to_messages(history: History, system: str) -> Messages:
messages = [{'role': Role.SYSTEM, 'content': system}]
for h in history:
messages.append({'role': Role.USER, 'content': h[0]})
messages.append({'role': Role.ASSISTANT, 'content': h[1]})
return messages
# Convert messages back to history format
def messages_to_history(messages: Messages) -> Tuple[str, History]:
assert messages[0]['role'] == Role.SYSTEM
system = messages[0]['content']
history = []
for q, r in zip(messages[1::2], messages[2::2]):
history.append((q['content'], r['content']))
return system, history
# Main function for chat
def model_chat(query: Optional[str], history: Optional[History], system: str) -> Tuple[str, str, History]:
if query is None:
query = ''
if history is None:
history = []
messages = history_to_messages(history, system)
messages.append({'role': Role.USER, 'content': query})
# Request generation with controlled parameters
gen = Generation.call(
model='qwen2.5-72b-instruct',
messages=messages,
result_format='message',
stream=True,
max_new_tokens=150 # Limit response length to save resources
)
for response in gen:
if response.status_code == HTTPStatus.OK:
role = response.output.choices[0].message.role
response_text = response.output.choices[0].message.content
# Log chat to file
log_history_to_file(query, response_text)
system, history = messages_to_history(messages + [{'role': role, 'content': response_text}])
yield '', history, system
else:
raise ValueError('Request id: %s, Status code: %s, error code: %s, error message: %s' % (
response.request_id, response.status_code,
response.code, response.message
))
# Gradio Interface Setup
with gr.Blocks() as demo:
gr.Markdown("<center><font size=8>Qwen2.5-72B-Instruct👾</center>")
with gr.Row():
with gr.Column(scale=3):
system_input = gr.Textbox(value=default_system, lines=1, label='System')
with gr.Column(scale=1):
modify_system = gr.Button("🛠️ Set system prompt and clear history", scale=2)
system_state = gr.Textbox(value=default_system, visible=False)
chatbot = gr.Chatbot(label='Qwen2.5-72B-Instruct')
textbox = gr.Textbox(lines=1, label='Input')
with gr.Row():
clear_history = gr.Button("🧹 Clear history")
submit = gr.Button("🚀 Send")
# Link buttons to functions
textbox.submit(model_chat,
inputs=[textbox, chatbot, system_state],
outputs=[textbox, chatbot, system_input],
concurrency_limit=5)
submit.click(model_chat,
inputs=[textbox, chatbot, system_state],
outputs=[textbox, chatbot, system_input],
concurrency_limit=5)
clear_history.click(fn=clear_session,
inputs=[],
outputs=[textbox, chatbot],
concurrency_limit=5)
modify_system.click(fn=modify_system_session,
inputs=[system_input],
outputs=[system_state, system_input, chatbot],
concurrency_limit=5)
# Launching Gradio Interface with reduced threads for free plan
demo.queue(api_open=False)
demo.launch(max_threads=10)