File size: 5,246 Bytes
6112f4c
6436fcc
a331c33
c126a1f
6436fcc
 
 
a331c33
8b0b7cc
 
c126a1f
6436fcc
a331c33
6436fcc
c126a1f
6436fcc
 
 
c126a1f
 
 
 
 
 
 
6436fcc
 
 
c126a1f
6436fcc
c126a1f
6436fcc
 
 
c126a1f
6436fcc
 
 
 
 
 
 
c126a1f
6436fcc
 
 
 
 
c126a1f
6436fcc
 
c126a1f
 
6436fcc
 
 
 
a331c33
 
 
 
c126a1f
6436fcc
a331c33
 
 
 
6436fcc
9946494
b589ee9
a331c33
9946494
a331c33
 
 
 
 
5990a55
a331c33
 
 
 
 
 
 
 
 
 
 
 
9946494
 
 
4909486
9946494
a331c33
 
6436fcc
 
b589ee9
6436fcc
 
 
 
 
 
 
c126a1f
b589ee9
6436fcc
 
 
 
 
 
a331c33
6436fcc
 
 
c126a1f
6436fcc
 
 
 
8884f60
c126a1f
6436fcc
 
 
8884f60
c126a1f
6436fcc
 
 
8884f60
6436fcc
c126a1f
6436fcc
df156ab
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
import os
import gradio as gr
from http import HTTPStatus
from typing import List, Optional, Tuple, Dict
import dashscope
from dashscope import Generation
from dashscope.api_entities.dashscope_response import Role
import requests  # <-- Add this line to import the requests library


# Configuration
default_system = 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.'
dashscope.api_key = os.getenv('HF_TOKEN')  # Replace 'YOUR_API_TOKEN' with your actual API token.

# Typing definitions
History = List[Tuple[str, str]]
Messages = List[Dict[str, str]]

# Function to log chat history to logs.txt
def log_history_to_file(query: str, response: str, file_path="logs.txt"):
    with open(file_path, "a") as f:
        f.write(f"User: {query}\n")
        f.write(f"Assistant: {response}\n\n")

# Function to clear session history
def clear_session() -> History:
    return '', []

# Function to modify system session prompt
def modify_system_session(system: str) -> str:
    if not system:
        system = default_system
    return system, system, []

# Convert history to messages format
def history_to_messages(history: History, system: str) -> Messages:
    messages = [{'role': Role.SYSTEM, 'content': system}]
    for h in history:
        messages.append({'role': Role.USER, 'content': h[0]})
        messages.append({'role': Role.ASSISTANT, 'content': h[1]})
    return messages

# Convert messages back to history format
def messages_to_history(messages: Messages) -> Tuple[str, History]:
    assert messages[0]['role'] == Role.SYSTEM
    system = messages[0]['content']
    history = []
    for q, r in zip(messages[1::2], messages[2::2]):
        history.append((q['content'], r['content']))
    return system, history

# Main function for chat
def model_chat(query: Optional[str], history: Optional[History], system: str) -> Tuple[str, str, History]:
    if query is None:
        query = ''
    if history is None:
        history = []

    # Ensure the query is clearly asking for numbers
    if 'next numbers' in query or 'give me numbers after' in query:
        query = "Please give me the next 10 numbers after 10, starting from 11."
    
    messages = history_to_messages(history, system)
    messages.append({'role': 'user', 'content': query})
    
    payload = {"inputs": query, "parameters": {"max_new_tokens": 150}, "history": messages}
    headers = {"Authorization": f"Bearer {os.getenv('HF_TOKEN')}"}
    
    try:
        response = requests.post(f"https://api-inference.huggingface.co/models/Qwen/Qwen2.5-32B-Instruct", 
                                 json=payload, headers=headers)
        
        if response.status_code == 200:
            response_data = response.json()
            
            if isinstance(response_data, list):
                response_text = response_data[0].get('generated_text', '')
            else:
                response_text = response_data.get('generated_text', '')
            
            # Log the chat to file
            log_history_to_file(query, response_text)
            
            # Update history with the new assistant response and return it
            history.append([query, response_text])
            return response_text, history, system
        else:
            error_message = f"Error {response.status_code}: {response.json().get('error', response.text)}"
            log_history_to_file(query, error_message)
            return error_message, history, system
    except Exception as e:
        error_message = f"Exception: {str(e)}"
        log_history_to_file(query, error_message)
        return error_message, history, system



# Gradio Interface Setup
with gr.Blocks() as demo:
    gr.Markdown("<center><font size=8>Qwen2.5-32B-Instruct👾</center>")

    with gr.Row():
        with gr.Column(scale=3):
            system_input = gr.Textbox(value=default_system, lines=1, label='System')
        with gr.Column(scale=1):
            modify_system = gr.Button("🛠️ Set system prompt and clear history", scale=2)
        system_state = gr.Textbox(value=default_system, visible=False)
    
    chatbot = gr.Chatbot(label='Qwen2.5-32B-Instruct')
    textbox = gr.Textbox(lines=1, label='Input')

    with gr.Row():
        clear_history = gr.Button("🧹 Clear history")
        submit = gr.Button("🚀 Send")

    # Link buttons to functions
    textbox.submit(model_chat,
                   inputs=[textbox, chatbot, system_state],
                   outputs=[textbox, chatbot, system_input],
                   concurrency_limit=5)

    submit.click(model_chat,
                 inputs=[textbox, chatbot, system_state],
                 outputs=[textbox, chatbot, system_input],
                 concurrency_limit=20)

    clear_history.click(fn=clear_session,
                        inputs=[],
                        outputs=[textbox, chatbot],
                        concurrency_limit=20)

    modify_system.click(fn=modify_system_session,
                        inputs=[system_input],
                        outputs=[system_state, system_input, chatbot],
                        concurrency_limit=20)

# Launching Gradio Interface with reduced threads for free plan
demo.queue(api_open=False)
demo.launch(max_threads=20)