Spaces:
Runtime error
Runtime error
File size: 5,586 Bytes
1cc07e3 ba55863 1cc07e3 c35feec 1cc07e3 ba55863 1cc07e3 ba55863 1cc07e3 6eb8c22 ba55863 6eb8c22 ba55863 1cc07e3 ba55863 1cc07e3 ba55863 1cc07e3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 |
import os
import nltk
import openai
import time
import gradio as gr
from threading import Thread #线程 用于定时器
from assets.char_poses_base64 import ( #角色动作
CHAR_IDLE_HTML, CHAR_THINKING_HTML, CHAR_TALKING_HTML)
from app_utils import (
get_chat_history, initialize_knowledge_base,
text_to_speech_gen, logging, buzz_user)
global max_response_tokens
global token_limit
max_response_tokens = 500
token_limit= 15000
global FUNC_CALL #全局变量 用于判断角色动作
FUNC_CALL = 0
global BUZZ_TIMEOUT #全局变量 用于定时器
BUZZ_TIMEOUT = 60
GENERAL_RSPONSE_TRIGGERS = ["I don't understand the question.", "I don't know", "Hello, my name is", "mentioned in the context provided"]
MESSAGES = [{"role": "system", "content": "你现在是一个优秀的展览馆讲解员,你可以通过文字或语音与客户交流,你可以讲述上海老建筑和历史人物之间的关系。"}]
LOGGER = logging.getLogger('voice_agent') #日志
AUDIO_HTML = ''
# Uncomment If this is your first Run:
nltk.download('averaged_perceptron_tagger') #下载语料库
conv_model, voice_model = initialize_knowledge_base() #初始化知识库
def num_tokens_from_messages(messages, model="gpt-3.5-turbo-16k"):
encoding = tiktoken.encoding_for_model(model)
num_tokens = 0
for message in messages:
num_tokens += 4 # every message follows <im_start>{role/name}\n{content}<im_end>\n
for key, value in message.items():
num_tokens += len(encoding.encode(value))
if key == "name": # if there's a name, the role is omitted
num_tokens += -1 # role is always required and always 1 token
num_tokens += 2 # every reply is primed with <im_start>assistant
return num_tokens
def idle_timer():
global BUZZ_TIMEOUT
while True:
time.sleep(BUZZ_TIMEOUT)
buzz_user()
if BUZZ_TIMEOUT == 80:
time.sleep(BUZZ_TIMEOUT)
BUZZ_TIMEOUT = 60
def update_img():
global FUNC_CALL
FUNC_CALL += 1
if FUNC_CALL % 2== 0:
return CHAR_TALKING_HTML
else:
return CHAR_THINKING_HTML
def get_response(history, audio_input):
query_type = 'text'
question =history[-1][0]
conv_history_tokens = 0
global BUZZ_TIMEOUT
BUZZ_TIMEOUT = 80
if not question:
if audio_input:
query_type = 'audio'
os.rename(audio_input, audio_input + '.wav')
audio_file = open(audio_input + '.wav', "rb")
transcript = openai.Audio.transcribe("whisper-1", audio_file)
question = transcript['text']
else:
return None, None
LOGGER.info("\nquery_type: %s", query_type)
LOGGER.info("query_text: %s", question)
print('\nquery_type:', query_type)
print('\nquery_text:', question)
if question.lower().strip() == 'hi':
question = 'hello'
answer = conv_model.run(question)
LOGGER.info("\ndocument_response: %s", answer)
print('\ndocument_response:', answer)
conv_history_tokens = num_tokens_from_messages(MESSAGES)
print("conv_history_tokens: ", conv_history_tokens)
while (conv_history_tokens + max_response_tokens >= token_limit):
del MESSAGES[1]
conv_history_tokens = num_tokens_from_messages(MESSAGES)
print("conv_history_tokens_ajust: ", conv_history_tokens)
for trigger in GENERAL_RSPONSE_TRIGGERS:
if trigger in answer:
MESSAGES.append({"role": "user", "content": question})
chat = openai.ChatCompletion.create(
model="gpt-3.5-turbo-16k",
messages=MESSAGES,
max_tokens=500,
temperature=0.7,
n=128,
stop="\n"
)
answer = chat.choices[0].message.content
MESSAGES.append({"role": "assistant", "content": answer})
LOGGER.info("general_response: %s", answer)
print('\ngeneral_response:', answer)
AUDIO_HTML = text_to_speech_gen(answer)
history[-1][1] = answer
return history, AUDIO_HTML
# buzz_usr_proc = Thread(target=idle_timer)
with gr.Blocks(css = """#col_image{width:800px; height:800px; margin-left: auto; margin-right: auto;}""") as demo:
with gr.Row(scale=0.7):
output_html = gr.HTML(label="Felix's Voice", value=AUDIO_HTML)
output_html.visible = False
image1= gr.Image("assets/NPCtest1.png").style(height=700) #elem_id = "col_image"
#assistant_character = gr.HTML(label=None, value=CHAR_IDLE_HTML, show_label=False)
with gr.Column(scale=0.3):
chatbot = gr.Chatbot(label='Send a text or a voice input').style(height=285)
with gr.Column():
msg = gr.Textbox(placeholder='Write a chat & press Enter.', show_label=False).style(container=False)
with gr.Column(scale=0.5):
audio_input = gr.Audio(source="microphone", type='filepath', show_label=False).style(container=False)
button = gr.Button(value="Send")
msg.submit(get_chat_history, [msg, chatbot], [msg, chatbot]
).then(get_response, [chatbot, audio_input], [chatbot, output_html]
)
button.click(get_chat_history, [msg, chatbot], [msg, chatbot]
).then(get_response, [chatbot, audio_input], [chatbot, output_html]
)
# buzz_usr_proc.start()
demo.launch(debug=False, favicon_path='assets/favicon.png', show_api=False, share=False) |