File size: 5,808 Bytes
1cc07e3
 
 
 
 
736c5c5
1cc07e3
 
 
 
 
 
 
 
 
ba55863
 
 
 
 
 
1cc07e3
 
 
 
 
 
f1b6383
 
49b4062
c35feec
1cc07e3
 
 
 
 
 
 
ba55863
 
 
 
 
 
 
 
 
 
 
1cc07e3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ba55863
1cc07e3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6eb8c22
 
af79fb8
ba55863
 
6eb8c22
 
f4adc69
 
1cc07e3
 
 
 
ba55863
1cc07e3
ba55863
1cc07e3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
import os
import nltk 
import openai
import time
import gradio as gr
import tiktoken
from threading import Thread #线程  用于定时器

from assets.char_poses_base64 import ( #角色动作
    CHAR_IDLE_HTML, CHAR_THINKING_HTML, CHAR_TALKING_HTML)

from app_utils import (
    get_chat_history, initialize_knowledge_base, 
    text_to_speech_gen, logging, buzz_user)

global max_response_tokens
global token_limit
max_response_tokens = 500
token_limit= 15000


global FUNC_CALL #全局变量 用于判断角色动作
FUNC_CALL = 0

global BUZZ_TIMEOUT #全局变量 用于定时器
BUZZ_TIMEOUT = 60

global MESSAGES

GENERAL_RSPONSE_TRIGGERS = ["I don't understand the question.", "I don't know", "Hello, my name is", "mentioned in the context provided","I don't know the answer"]
MESSAGES = [{"role": "system", "content": "你现在是一个优秀的展览馆讲解员,你可以通过文字或语音与客户交流,你可以讲述上海老建筑和历史人物之间的关系。"}]
LOGGER = logging.getLogger('voice_agent') #日志
AUDIO_HTML = ''

# Uncomment If this is your first Run: 
nltk.download('averaged_perceptron_tagger')  #下载语料库
conv_model, voice_model = initialize_knowledge_base()  #初始化知识库

def num_tokens_from_messages(messages, model="gpt-3.5-turbo-16k"):
    encoding = tiktoken.encoding_for_model(model)
    num_tokens = 0
    for message in messages:
        num_tokens += 4  # every message follows <im_start>{role/name}\n{content}<im_end>\n
        for key, value in message.items():
            num_tokens += len(encoding.encode(value))
            if key == "name":  # if there's a name, the role is omitted
                num_tokens += -1  # role is always required and always 1 token
    num_tokens += 2  # every reply is primed with <im_start>assistant
    return num_tokens

def idle_timer():
    global BUZZ_TIMEOUT

    while True:
        time.sleep(BUZZ_TIMEOUT)
        buzz_user()

        if BUZZ_TIMEOUT == 80:
            time.sleep(BUZZ_TIMEOUT)
            BUZZ_TIMEOUT = 60


def update_img():
    global FUNC_CALL
    FUNC_CALL += 1

    if FUNC_CALL % 2== 0:
        return CHAR_TALKING_HTML
    else:
        return CHAR_THINKING_HTML


def get_response(history, audio_input):

    query_type = 'text'
    question =history[-1][0]
    conv_history_tokens = 0

    global BUZZ_TIMEOUT
    BUZZ_TIMEOUT = 80

    if not question:
        if audio_input:
            query_type = 'audio'
            os.rename(audio_input, audio_input + '.wav')
            audio_file = open(audio_input + '.wav', "rb")
            transcript = openai.Audio.transcribe("whisper-1", audio_file)
            question = transcript['text']
        else:
            return None, None

    LOGGER.info("\nquery_type: %s", query_type)
    LOGGER.info("query_text: %s", question)
    print('\nquery_type:', query_type)
    print('\nquery_text:', question)

    if question.lower().strip() == 'hi':
        question = 'hello'
    
    answer = conv_model.run(question)
    LOGGER.info("\ndocument_response: %s", answer)
    print('\ndocument_response:', answer)

    conv_history_tokens = num_tokens_from_messages(MESSAGES)
    print("conv_history_tokens: ", conv_history_tokens)
    print("MESSAGES", MESSAGES)
    while (conv_history_tokens + max_response_tokens >= token_limit):
        del MESSAGES[1] 
        conv_history_tokens = num_tokens_from_messages(MESSAGES)
    print("conv_history_tokens_ajust: ", conv_history_tokens)
    MESSAGES.append({"role": "user", "content": question})
    MESSAGES.append({"role": "assistant", "content": answer})            
    for trigger in GENERAL_RSPONSE_TRIGGERS:
        if trigger in answer:    
            MESSAGES.append({"role": "user", "content": question})
            chat = openai.ChatCompletion.create(
                    model="gpt-3.5-turbo-16k", 
                    messages=MESSAGES,
                    max_tokens=500,
                    temperature=0.7,
                    n=128,
                    stop="\n"
                    )
            answer = chat.choices[0].message.content
            MESSAGES.append({"role": "assistant", "content": answer})
            LOGGER.info("general_response: %s", answer)
            print('\ngeneral_response:', answer)
    AUDIO_HTML = text_to_speech_gen(answer)
    history[-1][1] = answer

    return history, AUDIO_HTML

# buzz_usr_proc = Thread(target=idle_timer)

with gr.Blocks(css = """#col_image{width:800px; height:800px; margin-left: auto; margin-right: auto;}""") as demo: 
    with gr.Row(scale=0.7):
        output_html = gr.HTML(label="Felix's Voice", value=AUDIO_HTML)
        output_html.visible = False
        image1= gr.Image("assets/NPCtest1.png").style(height=700) #elem_id = "col_image"
        #assistant_character = gr.HTML(label=None, value=CHAR_IDLE_HTML, show_label=False)
        with gr.Column(scale=0.3):
            chatbot = gr.Chatbot(label='Send a text or a voice input').style(height=285)
            with gr.Column():
                msg = gr.Textbox(placeholder='Write a chat & press Enter.', show_label=False).style(container=False)
                with gr.Column(scale=0.5):
                    audio_input = gr.Audio(source="microphone", type='filepath', show_label=False).style(container=False)
                    button = gr.Button(value="Send")

    msg.submit(get_chat_history, [msg, chatbot], [msg, chatbot]
                ).then(get_response, [chatbot, audio_input], [chatbot, output_html]
                )

    button.click(get_chat_history, [msg, chatbot], [msg, chatbot]
                ).then(get_response, [chatbot, audio_input], [chatbot, output_html]
                )
    
    # buzz_usr_proc.start()
    
demo.launch(debug=False, favicon_path='assets/favicon.png', show_api=False, share=False)