Spaces:
Build error
Build error
import torch | |
import os | |
import gradio as gr | |
from transformers import pipeline | |
from pyChatGPT import ChatGPT | |
from speechbrain.pretrained import Tacotron2 | |
from speechbrain.pretrained import HIFIGAN | |
import json | |
import soundfile as sf | |
device = "cuda:0" if torch.cuda.is_available() else "cpu" | |
print(f"Is CUDA available: {torch.cuda.is_available()}") | |
print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}") | |
# Intialise STT (Whisper) | |
pipe = pipeline( | |
task="automatic-speech-recognition", | |
model="openai/whisper-base.en", | |
chunk_length_s=30, | |
device=device, | |
) | |
# Initialise ChatGPT session | |
session_token = os.environ.get("SessionToken") | |
api = ChatGPT(session_token=session_token) | |
# Intialise TTS (tacotron2) and Vocoder (HiFIGAN) | |
tacotron2 = Tacotron2.from_hparams( | |
source="speechbrain/tts-tacotron2-ljspeech", | |
savedir="tmpdir_tts", | |
overrides={"max_decoder_steps": 10000}, | |
run_opts={"device": device}, | |
) | |
hifi_gan = HIFIGAN.from_hparams(source="speechbrain/tts-hifigan-ljspeech", savedir="tmpdir_vocoder") | |
def get_response_from_chatbot(text, reset_conversation): | |
try: | |
if reset_conversation: | |
api.refresh_auth() | |
api.reset_conversation() | |
resp = api.send_message(text) | |
response = resp["message"] | |
except: | |
response = "Sorry, the chatGPT queue is full. Please try again later." | |
return response | |
def chat(input_audio, chat_history, reset_conversation): | |
# speech -> text (Whisper) | |
message = pipe(input_audio)["text"] | |
# text -> response (chatGPT) | |
response = get_response_from_chatbot(message, reset_conversation) | |
# response -> speech (tacotron2) | |
mel_output, mel_length, alignment = tacotron2.encode_text(response) | |
wav = hifi_gan.decode_batch(mel_output) | |
sf.write("out.wav", wav.squeeze().cpu().numpy(), 22050) | |
out_chat = [] | |
chat_history = chat_history if not reset_conversation else "" | |
if chat_history != "": | |
out_chat = json.loads(chat_history) | |
out_chat.append((message, response)) | |
chat_history = json.dumps(out_chat) | |
return out_chat, chat_history, "out.wav" | |
start_work = """async() => { | |
function isMobile() { | |
try { | |
document.createEvent("TouchEvent"); return true; | |
} catch(e) { | |
return false; | |
} | |
} | |
function getClientHeight() | |
{ | |
var clientHeight=0; | |
if(document.body.clientHeight&&document.documentElement.clientHeight) { | |
var clientHeight = (document.body.clientHeight<document.documentElement.clientHeight)?document.body.clientHeight:document.documentElement.clientHeight; | |
} else { | |
var clientHeight = (document.body.clientHeight>document.documentElement.clientHeight)?document.body.clientHeight:document.documentElement.clientHeight; | |
} | |
return clientHeight; | |
} | |
function setNativeValue(element, value) { | |
const valueSetter = Object.getOwnPropertyDescriptor(element.__proto__, 'value').set; | |
const prototype = Object.getPrototypeOf(element); | |
const prototypeValueSetter = Object.getOwnPropertyDescriptor(prototype, 'value').set; | |
if (valueSetter && valueSetter !== prototypeValueSetter) { | |
prototypeValueSetter.call(element, value); | |
} else { | |
valueSetter.call(element, value); | |
} | |
} | |
var gradioEl = document.querySelector('body > gradio-app').shadowRoot; | |
if (!gradioEl) { | |
gradioEl = document.querySelector('body > gradio-app'); | |
} | |
if (typeof window['gradioEl'] === 'undefined') { | |
window['gradioEl'] = gradioEl; | |
const page1 = window['gradioEl'].querySelectorAll('#page_1')[0]; | |
const page2 = window['gradioEl'].querySelectorAll('#page_2')[0]; | |
page1.style.display = "none"; | |
page2.style.display = "block"; | |
window['div_count'] = 0; | |
window['chat_bot'] = window['gradioEl'].querySelectorAll('#chat_bot')[0]; | |
window['chat_bot1'] = window['gradioEl'].querySelectorAll('#chat_bot1')[0]; | |
chat_row = window['gradioEl'].querySelectorAll('#chat_row')[0]; | |
prompt_row = window['gradioEl'].querySelectorAll('#prompt_row')[0]; | |
window['chat_bot1'].children[1].textContent = ''; | |
clientHeight = getClientHeight(); | |
new_height = (clientHeight-300) + 'px'; | |
chat_row.style.height = new_height; | |
window['chat_bot'].style.height = new_height; | |
window['chat_bot'].children[2].style.height = new_height; | |
window['chat_bot1'].style.height = new_height; | |
window['chat_bot1'].children[2].style.height = new_height; | |
prompt_row.children[0].style.flex = 'auto'; | |
prompt_row.children[0].style.width = '100%'; | |
window['checkChange'] = function checkChange() { | |
try { | |
if (window['chat_bot'].children[2].children[0].children.length > window['div_count']) { | |
new_len = window['chat_bot'].children[2].children[0].children.length - window['div_count']; | |
for (var i = 0; i < new_len; i++) { | |
new_div = window['chat_bot'].children[2].children[0].children[window['div_count'] + i].cloneNode(true); | |
window['chat_bot1'].children[2].children[0].appendChild(new_div); | |
} | |
window['div_count'] = chat_bot.children[2].children[0].children.length; | |
} | |
if (window['chat_bot'].children[0].children.length > 1) { | |
window['chat_bot1'].children[1].textContent = window['chat_bot'].children[0].children[1].textContent; | |
} else { | |
window['chat_bot1'].children[1].textContent = ''; | |
} | |
} catch(e) { | |
} | |
} | |
window['checkChange_interval'] = window.setInterval("window.checkChange()", 500); | |
} | |
return false; | |
}""" | |
with gr.Blocks(title="Talk to chatGPT") as demo: | |
gr.Markdown("## Talk to chatGPT ##") | |
gr.HTML( | |
"<p> Demo uses <a href='https://huggingface.co/openai/whisper-base.en' class='underline'>Whisper</a> to convert the input speech" | |
" to transcribed text, <a href='https://chat.openai.com/chat' class='underline'>chatGPT</a> to generate responses, and <a" | |
" href='https://huggingface.co/speechbrain/tts-tacotron2-ljspeech' class='underline'>tacotron2</a> to convert the response to" | |
" output speech: </p>" | |
) | |
gr.HTML("<p> <center><img src='https://raw.githubusercontent.com/sanchit-gandhi/codesnippets/main/pipeline.png' width='870'></center> </p>") | |
gr.HTML( | |
"<p>You can duplicate this space and use your own session token: <a style='display:inline-block'" | |
" href='https://huggingface.co/spaces/sanchit-gandhi/chatGPT?duplicate=true'><img" | |
" src='https://img.shields.io/badge/-Duplicate%20Space-blue?labelColor=white&style=flat&logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAAAXNSR0IArs4c6QAAAP5JREFUOE+lk7FqAkEURY+ltunEgFXS2sZGIbXfEPdLlnxJyDdYB62sbbUKpLbVNhyYFzbrrA74YJlh9r079973psed0cvUD4A+4HoCjsA85X0Dfn/RBLBgBDxnQPfAEJgBY+A9gALA4tcbamSzS4xq4FOQAJgCDwV2CPKV8tZAJcAjMMkUe1vX+U+SMhfAJEHasQIWmXNN3abzDwHUrgcRGmYcgKe0bxrblHEB4E/pndMazNpSZGcsZdBlYJcEL9Afo75molJyM2FxmPgmgPqlWNLGfwZGG6UiyEvLzHYDmoPkDDiNm9JR9uboiONcBXrpY1qmgs21x1QwyZcpvxt9NS09PlsPAAAAAElFTkSuQmCC&logoWidth=10'" | |
" alt='Duplicate Space'></a></p>" | |
) | |
gr.HTML( | |
"<p> Instructions on how to obtain your session token can be found in the video <a style='display:inline-block'" | |
" href='https://youtu.be/TdNSj_qgdFk?t=175'><font style='color:blue;weight:bold;'>here</font></a>." | |
" Add your session token by going to <i>Settings</i> -> <i>New secret</i> and add the token under the name <i>SessionToken</i>. </p>" | |
) | |
with gr.Group(elem_id="page_1", visible=True) as page_1: | |
with gr.Box(): | |
with gr.Row(): | |
start_button = gr.Button("Let's talk to chatGPT! 🗣", elem_id="start-btn", visible=True) | |
start_button.click(fn=None, inputs=[], outputs=[], _js=start_work) | |
with gr.Group(elem_id="page_2", visible=False) as page_2: | |
with gr.Row(elem_id="chat_row"): | |
chatbot = gr.Chatbot(elem_id="chat_bot", visible=False).style(color_map=("green", "blue")) | |
chatbot1 = gr.Chatbot(elem_id="chat_bot1").style(color_map=("green", "blue")) | |
with gr.Row(): | |
prompt_input_audio = gr.Audio( | |
source="microphone", | |
type="filepath", | |
label="Record Audio Input", | |
) | |
prompt_output_audio = gr.Audio() | |
reset_conversation = gr.Checkbox(label="Reset conversation?", value=False) | |
with gr.Row(elem_id="prompt_row"): | |
chat_history = gr.Textbox(lines=4, label="prompt", visible=False) | |
submit_btn = gr.Button(value="Send to chatGPT", elem_id="submit-btn").style( | |
margin=True, | |
rounded=(True, True, True, True), | |
width=100, | |
) | |
submit_btn.click( | |
fn=chat, | |
inputs=[prompt_input_audio, chat_history, reset_conversation], | |
outputs=[chatbot, chat_history, prompt_output_audio], | |
) | |
demo.launch(debug=True) | |