|
|
|
"""wiki_chat.ipynb |
|
|
|
Automatically generated by Colaboratory. |
|
|
|
Original file is located at |
|
https://colab.research.google.com/drive/1P5rJeCXRSsDJw_1ksnHmodH6ng2Ot5NW |
|
""" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
from azure_utils import AzureVoiceData |
|
from polly_utils import PollyVoiceData, NEURAL_ENGINE |
|
from langchain.prompts import PromptTemplate |
|
from openai.error import AuthenticationError, InvalidRequestError, RateLimitError |
|
import re |
|
import sys |
|
from io import StringIO |
|
from threading import Lock |
|
from langchain.llms import OpenAI |
|
from langchain.chains.conversation.memory import ConversationBufferMemory |
|
from langchain.agents import tool, load_tools, initialize_agent |
|
from langchain import ConversationChain, LLMChain |
|
import whisper |
|
import warnings |
|
import boto3 |
|
import datetime |
|
from typing import Optional, Tuple |
|
from contextlib import closing |
|
|
|
import io |
|
import requests |
|
import os |
|
import gradio as gr |
|
from sentence_transformers import SentenceTransformer, CrossEncoder, util |
|
from torch import tensor as torch_tensor |
|
from datasets import load_dataset |
|
|
|
from greg_funcs import mrkl_rspnd |
|
|
|
|
|
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") |
|
|
|
|
|
AWS_ACCESS_KEY_ID = os.getenv("AWS_ACCESS_KEY_ID") |
|
AWS_SECRET_ACCESS_KEY = os.getenv("AWS_SECRET_ACCESS_KEY") |
|
|
|
aws_region_name = "us-east-1" |
|
os.environ["AWS_DEFAULT_REGION"] = aws_region_name |
|
|
|
|
|
|
|
os.environ['EXHUMAN_API_KEY'] = '' |
|
|
|
|
|
os.environ["NEWS_API_KEY"] = '' |
|
os.environ["TMDB_BEARER_TOKEN"] = '' |
|
|
|
news_api_key = os.environ["NEWS_API_KEY"] |
|
tmdb_bearer_token = os.environ["TMDB_BEARER_TOKEN"] |
|
|
|
TOOLS_LIST = ['serpapi', 'wolfram-alpha', 'pal-math', 'pal-colored-objects', 'news-api', 'tmdb-api', |
|
'open-meteo-api'] |
|
TOOLS_DEFAULT_LIST = ['serpapi'] |
|
BUG_FOUND_MSG = "Congratulations, you've found a bug in this application!" |
|
AUTH_ERR_MSG = "Please paste your OpenAI key from openai.com to use this application. It is not necessary to hit a button or key after pasting it." |
|
MAX_TOKENS = 512 |
|
TEMPERATURE = 0 |
|
|
|
LOOPING_TALKING_HEAD = "videos/humancare.mp4" |
|
TALKING_HEAD_WIDTH = "192" |
|
MAX_TALKING_HEAD_TEXT_LENGTH = 155 |
|
|
|
|
|
NUM_WORDS_DEFAULT = 0 |
|
MAX_WORDS = 400 |
|
FORMALITY_DEFAULT = "N/A" |
|
TEMPERATURE_DEFAULT = 0.5 |
|
EMOTION_DEFAULT = "N/A" |
|
LANG_LEVEL_DEFAULT = "N/A" |
|
TRANSLATE_TO_DEFAULT = "N/A" |
|
LITERARY_STYLE_DEFAULT = "N/A" |
|
PROMPT_TEMPLATE = PromptTemplate( |
|
input_variables=["original_words", "num_words", "formality", |
|
"emotions", "lang_level", "translate_to", "literary_style"], |
|
template="Restate {num_words}{formality}{emotions}{lang_level}{translate_to}{literary_style}the following: \n{original_words}\n", |
|
) |
|
|
|
POLLY_VOICE_DATA = PollyVoiceData() |
|
AZURE_VOICE_DATA = AzureVoiceData() |
|
VOICE_GENDER = 'Female' |
|
|
|
|
|
WHISPER_DETECT_LANG = "Detect language" |
|
|
|
|
|
|
|
warnings.filterwarnings("ignore") |
|
WHISPER_MODEL = whisper.load_model("tiny") |
|
print("WHISPER_MODEL", WHISPER_MODEL) |
|
|
|
|
|
|
|
|
|
CSS = ".gradio-container {background-color: lightgray}" |
|
|
|
|
|
PLACEHOLDER = "How much is the monthly premium?" |
|
|
|
|
|
EXAMPLES = ["What is the name of my plan?", |
|
"How much is the monthly premium?", |
|
"Is prostate cancer screening supported by my plan?", |
|
"Have I spent enough on drug expenses for catastrophic coverage to kick in?"] |
|
AUTHORS = """ |
|
<p>This application, developed by <b>Greg Hayworth, Srikanth Tangelloju, Lincoln Snyder, Michal Piekarczyk, and Xingde Jiang</b>, |
|
demonstrates a conversational agent implemented with OpenAI GPT-3.5 and LangChain. |
|
For faster inference without waiting in queue, you may duplicate the space. |
|
</p>""" |
|
|
|
|
|
|
|
def transcribe(aud_inp, whisper_lang): |
|
if aud_inp is None: |
|
return "" |
|
aud = whisper.load_audio(aud_inp) |
|
aud = whisper.pad_or_trim(aud) |
|
mel = whisper.log_mel_spectrogram(aud).to(WHISPER_MODEL.device) |
|
_, probs = WHISPER_MODEL.detect_language(mel) |
|
options = whisper.DecodingOptions(fp16=False) |
|
if whisper_lang != WHISPER_DETECT_LANG: |
|
whisper_lang_code = POLLY_VOICE_DATA.get_whisper_lang_code( |
|
whisper_lang) |
|
options = whisper.DecodingOptions( |
|
fp16=False, language=whisper_lang_code) |
|
result = whisper.decode(WHISPER_MODEL, mel, options) |
|
print("result.text", result.text) |
|
result_text = "" |
|
if result and result.text: |
|
result_text = result.text |
|
return result_text |
|
|
|
|
|
|
|
def transform_text(desc, express_chain, num_words, formality, |
|
anticipation_level, joy_level, trust_level, |
|
fear_level, surprise_level, sadness_level, disgust_level, anger_level, |
|
lang_level, translate_to, literary_style): |
|
num_words_prompt = "" |
|
if num_words and int(num_words) != 0: |
|
num_words_prompt = "using up to " + str(num_words) + " words, " |
|
|
|
|
|
formality = formality.lower() |
|
anticipation_level = anticipation_level.lower() |
|
joy_level = joy_level.lower() |
|
trust_level = trust_level.lower() |
|
fear_level = fear_level.lower() |
|
surprise_level = surprise_level.lower() |
|
sadness_level = sadness_level.lower() |
|
disgust_level = disgust_level.lower() |
|
anger_level = anger_level.lower() |
|
|
|
formality_str = "" |
|
if formality != "n/a": |
|
formality_str = "in a " + formality + " manner, " |
|
|
|
|
|
emotions = [] |
|
if anticipation_level != "n/a": |
|
emotions.append(anticipation_level) |
|
if joy_level != "n/a": |
|
emotions.append(joy_level) |
|
if trust_level != "n/a": |
|
emotions.append(trust_level) |
|
if fear_level != "n/a": |
|
emotions.append(fear_level) |
|
if surprise_level != "n/a": |
|
emotions.append(surprise_level) |
|
if sadness_level != "n/a": |
|
emotions.append(sadness_level) |
|
if disgust_level != "n/a": |
|
emotions.append(disgust_level) |
|
if anger_level != "n/a": |
|
emotions.append(anger_level) |
|
|
|
emotions_str = "" |
|
if len(emotions) > 0: |
|
if len(emotions) == 1: |
|
emotions_str = "with emotion of " + emotions[0] + ", " |
|
else: |
|
emotions_str = "with emotions of " + \ |
|
", ".join(emotions[:-1]) + " and " + emotions[-1] + ", " |
|
|
|
lang_level_str = "" |
|
if lang_level != LANG_LEVEL_DEFAULT: |
|
lang_level_str = "at a " + lang_level + \ |
|
" level, " if translate_to == TRANSLATE_TO_DEFAULT else "" |
|
|
|
translate_to_str = "" |
|
if translate_to != TRANSLATE_TO_DEFAULT: |
|
translate_to_str = "translated to " + \ |
|
("" if lang_level == TRANSLATE_TO_DEFAULT else lang_level + |
|
" level ") + translate_to + ", " |
|
|
|
literary_style_str = "" |
|
if literary_style != LITERARY_STYLE_DEFAULT: |
|
if literary_style == "Prose": |
|
literary_style_str = "as prose, " |
|
elif literary_style == "Summary": |
|
literary_style_str = "as a summary, " |
|
elif literary_style == "Outline": |
|
literary_style_str = "as an outline numbers and lower case letters, " |
|
elif literary_style == "Bullets": |
|
literary_style_str = "as bullet points using bullets, " |
|
elif literary_style == "Poetry": |
|
literary_style_str = "as a poem, " |
|
elif literary_style == "Haiku": |
|
literary_style_str = "as a haiku, " |
|
elif literary_style == "Limerick": |
|
literary_style_str = "as a limerick, " |
|
elif literary_style == "Joke": |
|
literary_style_str = "as a very funny joke with a setup and punchline, " |
|
elif literary_style == "Knock-knock": |
|
literary_style_str = "as a very funny knock-knock joke, " |
|
|
|
formatted_prompt = PROMPT_TEMPLATE.format( |
|
original_words=desc, |
|
num_words=num_words_prompt, |
|
formality=formality_str, |
|
emotions=emotions_str, |
|
lang_level=lang_level_str, |
|
translate_to=translate_to_str, |
|
literary_style=literary_style_str |
|
) |
|
|
|
trans_instr = num_words_prompt + formality_str + emotions_str + \ |
|
lang_level_str + translate_to_str + literary_style_str |
|
if express_chain and len(trans_instr.strip()) > 0: |
|
generated_text = express_chain.run( |
|
{'original_words': desc, 'num_words': num_words_prompt, 'formality': formality_str, |
|
'emotions': emotions_str, 'lang_level': lang_level_str, 'translate_to': translate_to_str, |
|
'literary_style': literary_style_str}).strip() |
|
else: |
|
print("Not transforming text") |
|
generated_text = desc |
|
|
|
|
|
generated_text = generated_text.replace("\n", "\n\n") |
|
|
|
prompt_plus_generated = "GPT prompt: " + \ |
|
formatted_prompt + "\n\n" + generated_text |
|
|
|
print("\n==== date/time: " + str(datetime.datetime.now() - |
|
datetime.timedelta(hours=5)) + " ====") |
|
print("prompt_plus_generated: " + prompt_plus_generated) |
|
|
|
return generated_text |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def run_chain(chain, inp, capture_hidden_text): |
|
output = "" |
|
hidden_text = None |
|
if capture_hidden_text: |
|
error_msg = None |
|
tmp = sys.stdout |
|
hidden_text_io = StringIO() |
|
sys.stdout = hidden_text_io |
|
|
|
try: |
|
output = chain.run(input=inp) |
|
except AuthenticationError as ae: |
|
error_msg = AUTH_ERR_MSG |
|
except RateLimitError as rle: |
|
error_msg = "\n\nRateLimitError: " + str(rle) |
|
except ValueError as ve: |
|
error_msg = "\n\nValueError: " + str(ve) |
|
except InvalidRequestError as ire: |
|
error_msg = "\n\nInvalidRequestError: " + str(ire) |
|
except Exception as e: |
|
error_msg = "\n\n" + BUG_FOUND_MSG + ":\n\n" + str(e) |
|
|
|
sys.stdout = tmp |
|
hidden_text = hidden_text_io.getvalue() |
|
|
|
|
|
hidden_text = re.sub(r'\x1b[^m]*m', '', hidden_text) |
|
|
|
|
|
hidden_text = re.sub( |
|
r"Entering new AgentExecutor chain...\n", "", hidden_text) |
|
|
|
|
|
hidden_text = re.sub(r"Finished chain.", "", hidden_text) |
|
|
|
|
|
hidden_text = re.sub(r"Thought:", "\n\nThought:", hidden_text) |
|
hidden_text = re.sub(r"Action:", "\n\nAction:", hidden_text) |
|
hidden_text = re.sub(r"Observation:", "\n\nObservation:", hidden_text) |
|
hidden_text = re.sub(r"Input:", "\n\nInput:", hidden_text) |
|
hidden_text = re.sub(r"AI:", "\n\nAI:", hidden_text) |
|
|
|
if error_msg: |
|
hidden_text += error_msg |
|
|
|
print("hidden_text: ", hidden_text) |
|
else: |
|
try: |
|
output = chain.run(input=inp) |
|
except AuthenticationError as ae: |
|
output = AUTH_ERR_MSG |
|
except RateLimitError as rle: |
|
output = "\n\nRateLimitError: " + str(rle) |
|
except ValueError as ve: |
|
output = "\n\nValueError: " + str(ve) |
|
except InvalidRequestError as ire: |
|
output = "\n\nInvalidRequestError: " + str(ire) |
|
except Exception as e: |
|
output = "\n\n" + BUG_FOUND_MSG + ":\n\n" + str(e) |
|
|
|
return output, hidden_text |
|
|
|
|
|
class ChatWrapper: |
|
|
|
def __init__(self): |
|
self.lock = Lock() |
|
|
|
def __call__( |
|
self, inp: str, history: Optional[Tuple[str, str]], chain: Optional[ConversationChain], |
|
trace_chain: bool, speak_text: bool, talking_head: bool, monologue: bool, express_chain: Optional[LLMChain], |
|
num_words, formality, anticipation_level, joy_level, trust_level, |
|
fear_level, surprise_level, sadness_level, disgust_level, anger_level, |
|
lang_level, translate_to, literary_style |
|
): |
|
"""Execute the chat functionality.""" |
|
self.lock.acquire() |
|
|
|
|
|
|
|
|
|
try: |
|
print("\n==== date/time: " + str(datetime.datetime.now()) + " ====") |
|
print("inp: " + inp) |
|
print("trace_chain: ", trace_chain) |
|
print("speak_text: ", speak_text) |
|
print("talking_head: ", talking_head) |
|
talking_head = False |
|
print("monologue: ", monologue) |
|
history = history or [] |
|
|
|
output = AUTH_ERR_MSG |
|
hidden_text = output |
|
|
|
|
|
response = mrkl_rspnd(inp) |
|
output = response['output'] |
|
|
|
""" |
|
if chain: |
|
# Set OpenAI key |
|
import openai |
|
openai.api_key = OPENAI_API_KEY |
|
# openai.api_key = api_key |
|
if not monologue: |
|
output, hidden_text = run_chain( |
|
chain, inp, capture_hidden_text=trace_chain) |
|
else: |
|
output, hidden_text = inp, None |
|
""" |
|
print("original output", output) |
|
print("using these knobs:", |
|
( |
|
formality, anticipation_level, joy_level, |
|
trust_level, |
|
fear_level, surprise_level, sadness_level, disgust_level, anger_level, |
|
lang_level, translate_to, literary_style |
|
) |
|
|
|
) |
|
|
|
output = transform_text(output, express_chain, num_words, formality, anticipation_level, joy_level, |
|
trust_level, |
|
fear_level, surprise_level, sadness_level, disgust_level, anger_level, |
|
lang_level, translate_to, literary_style) |
|
|
|
print("transformed output", output) |
|
|
|
text_to_display = output |
|
if trace_chain: |
|
text_to_display = hidden_text + "\n\n" + output |
|
history.append((inp, text_to_display)) |
|
|
|
html_video, temp_file, html_audio, temp_aud_file = None, None, None, None |
|
if speak_text: |
|
if talking_head: |
|
if len(output) <= MAX_TALKING_HEAD_TEXT_LENGTH: |
|
html_video, temp_file = do_html_video_speak( |
|
output, translate_to) |
|
else: |
|
temp_file = LOOPING_TALKING_HEAD |
|
html_video = create_html_video( |
|
temp_file, TALKING_HEAD_WIDTH) |
|
html_audio, temp_aud_file = do_html_audio_speak( |
|
output, translate_to) |
|
else: |
|
html_audio, temp_aud_file = do_html_audio_speak( |
|
output, translate_to) |
|
else: |
|
if talking_head: |
|
temp_file = LOOPING_TALKING_HEAD |
|
html_video = create_html_video( |
|
temp_file, TALKING_HEAD_WIDTH) |
|
else: |
|
|
|
|
|
pass |
|
|
|
except Exception as e: |
|
raise e |
|
finally: |
|
self.lock.release() |
|
return history, history, html_video, temp_file, html_audio, temp_aud_file, "" |
|
|
|
|
|
|
|
chat = ChatWrapper() |
|
|
|
|
|
def do_html_audio_speak(words_to_speak, polly_language): |
|
print(f"words_to_speak: {words_to_speak}") |
|
print(f"polly_language: {polly_language}") |
|
print(f"VOICE_GENDER: {VOICE_GENDER}") |
|
polly_client = boto3.Session( |
|
aws_access_key_id=os.environ["AWS_ACCESS_KEY_ID"], |
|
aws_secret_access_key=os.environ["AWS_SECRET_ACCESS_KEY"], |
|
region_name=os.environ["AWS_DEFAULT_REGION"] |
|
).client('polly') |
|
|
|
voice_id, language_code, engine = POLLY_VOICE_DATA.get_voice( |
|
polly_language, VOICE_GENDER) |
|
|
|
|
|
|
|
|
|
if not voice_id: |
|
voice_id = "Joanna" |
|
language_code = "en-US" |
|
engine = NEURAL_ENGINE |
|
|
|
|
|
|
|
|
|
response = polly_client.synthesize_speech( |
|
Text=words_to_speak, |
|
OutputFormat='mp3', |
|
VoiceId=voice_id, |
|
LanguageCode=language_code, |
|
Engine=engine |
|
) |
|
|
|
print('-'*10) |
|
print(f'response: {response}') |
|
print('-'*10) |
|
|
|
html_audio = '<pre>no audio</pre>' |
|
|
|
|
|
if "AudioStream" in response: |
|
with closing(response["AudioStream"]) as stream: |
|
|
|
|
|
try: |
|
with open('audios/tempfile.mp3', 'wb') as f: |
|
f.write(stream.read()) |
|
temp_aud_file = gr.File("audios/tempfile.mp3") |
|
temp_aud_file_url = "/file=" + temp_aud_file.value['name'] |
|
html_audio = f'<audio autoplay><source src={temp_aud_file_url} type="audio/mp3"></audio>' |
|
except IOError as error: |
|
|
|
print(error) |
|
return None, None |
|
else: |
|
|
|
print("Could not stream audio") |
|
return None, None |
|
|
|
return html_audio, "audios/tempfile.mp3" |
|
|
|
|
|
def create_html_video(file_name, width): |
|
temp_file_url = "/file=" + tmp_vid_file.value['name'] |
|
html_video = f'<video width={width} height={width} autoplay muted loop><source src={temp_file_url} type="video/mp4" poster="humancare.jpg"></video>' |
|
return html_video |
|
|
|
|
|
def do_html_video_speak(words_to_speak, azure_language): |
|
azure_voice = AZURE_VOICE_DATA.get_voice(azure_language, "Male") |
|
if not azure_voice: |
|
azure_voice = "en-US-ChristopherNeural" |
|
|
|
headers = {"Authorization": f"Bearer {os.environ['EXHUMAN_API_KEY']}"} |
|
body = { |
|
'bot_name': 'humancare', |
|
'bot_response': words_to_speak, |
|
'azure_voice': azure_voice, |
|
'azure_style': 'friendly', |
|
'animation_pipeline': 'high_speed', |
|
} |
|
api_endpoint = "https://api.exh.ai/animations/v1/generate_lipsync" |
|
res = requests.post(api_endpoint, json=body, headers=headers) |
|
print("res.status_code: ", res.status_code) |
|
|
|
html_video = '<pre>no video</pre>' |
|
if isinstance(res.content, bytes): |
|
response_stream = io.BytesIO(res.content) |
|
print("len(res.content)): ", len(res.content)) |
|
|
|
with open('videos/tempfile.mp4', 'wb') as f: |
|
f.write(response_stream.read()) |
|
temp_file = gr.File("videos/tempfile.mp4") |
|
temp_file_url = "/file=" + temp_file.value['name'] |
|
html_video = f'<video width={TALKING_HEAD_WIDTH} height={TALKING_HEAD_WIDTH} autoplay><source src={temp_file_url} type="video/mp4" poster="humancare.jpg"></video>' |
|
else: |
|
print('video url unknown') |
|
return html_video, "videos/tempfile.mp4" |
|
|
|
|
|
def update_selected_tools(widget, state, llm): |
|
if widget: |
|
state = widget |
|
chain, express_chain = load_chain(state, llm) |
|
return state, llm, chain, express_chain |
|
|
|
|
|
def update_talking_head(widget, state): |
|
if widget: |
|
state = widget |
|
|
|
video_html_talking_head = create_html_video( |
|
LOOPING_TALKING_HEAD, TALKING_HEAD_WIDTH) |
|
return state, video_html_talking_head |
|
else: |
|
|
|
return None, "<pre></pre>" |
|
|
|
|
|
def update_foo(widget, state): |
|
if widget: |
|
state = widget |
|
return state |
|
|
|
|
|
with gr.Blocks(css=CSS) as block: |
|
llm_state = gr.State() |
|
history_state = gr.State() |
|
chain_state = gr.State() |
|
express_chain_state = gr.State() |
|
tools_list_state = gr.State(TOOLS_DEFAULT_LIST) |
|
trace_chain_state = gr.State(False) |
|
speak_text_state = gr.State(False) |
|
talking_head_state = gr.State(True) |
|
|
|
monologue_state = gr.State(False) |
|
|
|
|
|
num_words_state = gr.State(NUM_WORDS_DEFAULT) |
|
formality_state = gr.State(FORMALITY_DEFAULT) |
|
anticipation_level_state = gr.State(EMOTION_DEFAULT) |
|
joy_level_state = gr.State(EMOTION_DEFAULT) |
|
trust_level_state = gr.State(EMOTION_DEFAULT) |
|
fear_level_state = gr.State(EMOTION_DEFAULT) |
|
surprise_level_state = gr.State(EMOTION_DEFAULT) |
|
sadness_level_state = gr.State(EMOTION_DEFAULT) |
|
disgust_level_state = gr.State(EMOTION_DEFAULT) |
|
anger_level_state = gr.State(EMOTION_DEFAULT) |
|
lang_level_state = gr.State(LANG_LEVEL_DEFAULT) |
|
translate_to_state = gr.State(TRANSLATE_TO_DEFAULT) |
|
literary_style_state = gr.State(LITERARY_STYLE_DEFAULT) |
|
|
|
|
|
whisper_lang_state = gr.State(WHISPER_DETECT_LANG) |
|
|
|
with gr.Tab("Chat"): |
|
with gr.Row(): |
|
with gr.Column(): |
|
gr.Markdown("""# NLP QA Chat Demo""") |
|
|
|
with gr.Row(): |
|
with gr.Column(scale=1, min_width=TALKING_HEAD_WIDTH, visible=True): |
|
speak_text_cb = gr.Checkbox(label="Enable speech", value=True) |
|
speak_text_cb.change(update_foo, inputs=[speak_text_cb, speak_text_state], |
|
outputs=[speak_text_state]) |
|
|
|
my_file = gr.File(label="Upload a file", |
|
type="file", visible=False) |
|
tmp_vid_file = gr.File(LOOPING_TALKING_HEAD, visible=False) |
|
|
|
|
|
|
|
|
|
video_html = gr.HTML("<pre></pre>") |
|
|
|
|
|
tmp_aud_file = gr.File("audios/tempfile.mp3", visible=False) |
|
tmp_aud_file_url = "/file=" + tmp_aud_file.value['name'] |
|
htm_audio = f'<audio><source src={tmp_aud_file_url} type="audio/mp3"></audio>' |
|
|
|
|
|
|
|
audio_html = gr.HTML(htm_audio) |
|
|
|
with gr.Column(scale=7): |
|
chatbot = gr.Chatbot() |
|
|
|
with gr.Row(): |
|
message = gr.Textbox(label="What's on your mind??", |
|
placeholder=PLACEHOLDER, |
|
lines=1) |
|
submit = gr.Button(value="Send", variant="secondary").style( |
|
full_width=False) |
|
|
|
|
|
with gr.Row(): |
|
audio_comp = gr.Microphone(source="microphone", type="filepath", label="Just say it!", |
|
interactive=True, streaming=False) |
|
audio_comp.change(transcribe, inputs=[ |
|
audio_comp, whisper_lang_state], outputs=[message]) |
|
|
|
gr.Examples( |
|
examples=EXAMPLES, |
|
inputs=message |
|
) |
|
|
|
with gr.Tab("Settings"): |
|
tools_cb_group = gr.CheckboxGroup(label="Tools:", choices=TOOLS_LIST, |
|
value=TOOLS_DEFAULT_LIST) |
|
tools_cb_group.change(update_selected_tools, |
|
inputs=[tools_cb_group, |
|
tools_list_state, llm_state], |
|
outputs=[tools_list_state, llm_state, chain_state, express_chain_state]) |
|
|
|
trace_chain_cb = gr.Checkbox( |
|
label="Show reasoning chain in chat bubble", value=False) |
|
trace_chain_cb.change(update_foo, inputs=[trace_chain_cb, trace_chain_state], |
|
outputs=[trace_chain_state]) |
|
|
|
|
|
|
|
|
|
|
|
talking_head_cb = gr.Checkbox(label="Show talking head", value=False) |
|
talking_head_cb.change(update_talking_head, inputs=[talking_head_cb, talking_head_state], |
|
outputs=[talking_head_state, video_html]) |
|
|
|
monologue_cb = gr.Checkbox(label="Babel fish mode (translate/restate what you enter, no conversational agent)", |
|
value=False) |
|
monologue_cb.change(update_foo, inputs=[monologue_cb, monologue_state], |
|
outputs=[monologue_state]) |
|
|
|
with gr.Tab("Whisper STT"): |
|
whisper_lang_radio = gr.Radio(label="Whisper speech-to-text language:", choices=[ |
|
WHISPER_DETECT_LANG, "Arabic", "Arabic (Gulf)", "Catalan", "Chinese (Cantonese)", "Chinese (Mandarin)", |
|
"Danish", "Dutch", "English (Australian)", "English (British)", "English (Indian)", "English (New Zealand)", |
|
"English (South African)", "English (US)", "English (Welsh)", "Finnish", "French", "French (Canadian)", |
|
"German", "German (Austrian)", "Georgian", "Hindi", "Icelandic", "Indonesian", "Italian", "Japanese", |
|
"Korean", "Norwegian", "Polish", |
|
"Portuguese (Brazilian)", "Portuguese (European)", "Romanian", "Russian", "Spanish (European)", |
|
"Spanish (Mexican)", "Spanish (US)", "Swedish", "Turkish", "Ukrainian", "Welsh"], |
|
value=WHISPER_DETECT_LANG) |
|
|
|
whisper_lang_radio.change(update_foo, |
|
inputs=[whisper_lang_radio, |
|
whisper_lang_state], |
|
outputs=[whisper_lang_state]) |
|
|
|
with gr.Tab("Translate to"): |
|
lang_level_radio = gr.Radio(label="Language level:", choices=[ |
|
LANG_LEVEL_DEFAULT, "1st grade", "2nd grade", "3rd grade", "4th grade", "5th grade", "6th grade", |
|
"7th grade", "8th grade", "9th grade", "10th grade", "11th grade", "12th grade", "University"], |
|
value=LANG_LEVEL_DEFAULT) |
|
lang_level_radio.change(update_foo, inputs=[lang_level_radio, lang_level_state], |
|
outputs=[lang_level_state]) |
|
|
|
translate_to_radio = gr.Radio(label="Language:", choices=[ |
|
TRANSLATE_TO_DEFAULT, "Arabic", "Arabic (Gulf)", "Catalan", "Chinese (Cantonese)", "Chinese (Mandarin)", |
|
"Danish", "Dutch", "English (Australian)", "English (British)", "English (Indian)", "English (New Zealand)", |
|
"English (South African)", "English (US)", "English (Welsh)", "Finnish", "French", "French (Canadian)", |
|
"German", "German (Austrian)", "Georgian", "Hindi", "Icelandic", "Indonesian", "Italian", "Japanese", |
|
"Korean", "Norwegian", "Polish", |
|
"Portuguese (Brazilian)", "Portuguese (European)", "Romanian", "Russian", "Spanish (European)", |
|
"Spanish (Mexican)", "Spanish (US)", "Swedish", "Turkish", "Ukrainian", "Welsh", |
|
"emojis", "Gen Z slang", "how the stereotypical Karen would say it", "Klingon", |
|
"Pirate", "Strange Planet expospeak technical talk", "Yoda"], |
|
value=TRANSLATE_TO_DEFAULT) |
|
|
|
translate_to_radio.change(update_foo, |
|
inputs=[translate_to_radio, |
|
translate_to_state], |
|
outputs=[translate_to_state]) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
with gr.Tab("Max words"): |
|
num_words_slider = gr.Slider(label="Max number of words to generate (0 for don't care)", |
|
value=NUM_WORDS_DEFAULT, minimum=0, maximum=MAX_WORDS, step=10) |
|
num_words_slider.change(update_foo, |
|
inputs=[num_words_slider, num_words_state], |
|
outputs=[num_words_state]) |
|
|
|
gr.HTML(AUTHORS) |
|
|
|
gr.HTML("""<center> |
|
<a href="https://huggingface.co/spaces/gfhayworth/chat_qa_demo?duplicate=true"> |
|
<img style="margin-top: 0em; margin-bottom: 0em" src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a> |
|
Powered by <a href='https://github.com/hwchase17/langchain'>LangChain 🦜️🔗</a> |
|
</center>""") |
|
|
|
message.submit(chat, inputs=[message, history_state, chain_state, trace_chain_state, |
|
speak_text_state, talking_head_state, monologue_state, |
|
express_chain_state, num_words_state, formality_state, |
|
anticipation_level_state, joy_level_state, trust_level_state, fear_level_state, |
|
surprise_level_state, sadness_level_state, disgust_level_state, anger_level_state, |
|
lang_level_state, translate_to_state, literary_style_state], |
|
outputs=[chatbot, history_state, video_html, my_file, audio_html, tmp_aud_file, message]) |
|
|
|
|
|
submit.click(chat, inputs=[message, history_state, chain_state, trace_chain_state, |
|
speak_text_state, talking_head_state, monologue_state, |
|
express_chain_state, num_words_state, formality_state, |
|
anticipation_level_state, joy_level_state, trust_level_state, fear_level_state, |
|
surprise_level_state, sadness_level_state, disgust_level_state, anger_level_state, |
|
lang_level_state, translate_to_state, literary_style_state], |
|
outputs=[chatbot, history_state, video_html, my_file, audio_html, tmp_aud_file, message]) |
|
|
|
|
|
|
|
block.launch(debug=True) |
|
|