|
import torch |
|
import os |
|
|
|
import gradio as gr |
|
from transformers import pipeline |
|
|
|
from pyChatGPT import ChatGPT |
|
|
|
from speechbrain.pretrained import Tacotron2 |
|
from speechbrain.pretrained import HIFIGAN |
|
|
|
import json |
|
import soundfile as sf |
|
|
|
|
|
device = "cuda:0" if torch.cuda.is_available() else "cpu" |
|
|
|
print(f"Is CUDA available: {torch.cuda.is_available()}") |
|
print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}") |
|
|
|
|
|
pipe = pipeline( |
|
task="automatic-speech-recognition", |
|
model="openai/whisper-base.en", |
|
chunk_length_s=30, |
|
device=device, |
|
) |
|
|
|
|
|
session_token = os.environ.get("SessionToken") |
|
api = ChatGPT(session_token=session_token) |
|
|
|
|
|
tacotron2 = Tacotron2.from_hparams( |
|
source="speechbrain/tts-tacotron2-ljspeech", |
|
savedir="tmpdir_tts", |
|
overrides={"max_decoder_steps": 10000}, |
|
run_opts={"device": device}, |
|
) |
|
hifi_gan = HIFIGAN.from_hparams(source="speechbrain/tts-hifigan-ljspeech", savedir="tmpdir_vocoder") |
|
|
|
|
|
def get_response_from_chatbot(text, reset_conversation): |
|
try: |
|
if reset_conversation: |
|
api.refresh_auth() |
|
api.reset_conversation() |
|
resp = api.send_message(text) |
|
response = resp["message"] |
|
except: |
|
response = "Sorry, the chatGPT queue is full. Please try again later." |
|
return response |
|
|
|
|
|
def chat(input_audio, chat_history, reset_conversation): |
|
|
|
message = pipe(input_audio)["text"] |
|
|
|
|
|
response = get_response_from_chatbot(message, reset_conversation) |
|
|
|
|
|
mel_output, mel_length, alignment = tacotron2.encode_text(response) |
|
wav = hifi_gan.decode_batch(mel_output) |
|
sf.write("out.wav", wav.squeeze().cpu().numpy(), 22050) |
|
|
|
out_chat = [] |
|
chat_history = chat_history if not reset_conversation else "" |
|
if chat_history != "": |
|
out_chat = json.loads(chat_history) |
|
|
|
out_chat.append((message, response)) |
|
chat_history = json.dumps(out_chat) |
|
|
|
return out_chat, chat_history, "out.wav" |
|
|
|
|
|
start_work = """async() => { |
|
function isMobile() { |
|
try { |
|
document.createEvent("TouchEvent"); return true; |
|
} catch(e) { |
|
return false; |
|
} |
|
} |
|
function getClientHeight() |
|
{ |
|
var clientHeight=0; |
|
if(document.body.clientHeight&&document.documentElement.clientHeight) { |
|
var clientHeight = (document.body.clientHeight<document.documentElement.clientHeight)?document.body.clientHeight:document.documentElement.clientHeight; |
|
} else { |
|
var clientHeight = (document.body.clientHeight>document.documentElement.clientHeight)?document.body.clientHeight:document.documentElement.clientHeight; |
|
} |
|
return clientHeight; |
|
} |
|
|
|
function setNativeValue(element, value) { |
|
const valueSetter = Object.getOwnPropertyDescriptor(element.__proto__, 'value').set; |
|
const prototype = Object.getPrototypeOf(element); |
|
const prototypeValueSetter = Object.getOwnPropertyDescriptor(prototype, 'value').set; |
|
|
|
if (valueSetter && valueSetter !== prototypeValueSetter) { |
|
prototypeValueSetter.call(element, value); |
|
} else { |
|
valueSetter.call(element, value); |
|
} |
|
} |
|
var gradioEl = document.querySelector('body > gradio-app').shadowRoot; |
|
if (!gradioEl) { |
|
gradioEl = document.querySelector('body > gradio-app'); |
|
} |
|
|
|
if (typeof window['gradioEl'] === 'undefined') { |
|
window['gradioEl'] = gradioEl; |
|
|
|
const page1 = window['gradioEl'].querySelectorAll('#page_1')[0]; |
|
const page2 = window['gradioEl'].querySelectorAll('#page_2')[0]; |
|
|
|
page1.style.display = "none"; |
|
page2.style.display = "block"; |
|
window['div_count'] = 0; |
|
window['chat_bot'] = window['gradioEl'].querySelectorAll('#chat_bot')[0]; |
|
window['chat_bot1'] = window['gradioEl'].querySelectorAll('#chat_bot1')[0]; |
|
chat_row = window['gradioEl'].querySelectorAll('#chat_row')[0]; |
|
prompt_row = window['gradioEl'].querySelectorAll('#prompt_row')[0]; |
|
window['chat_bot1'].children[1].textContent = ''; |
|
|
|
clientHeight = getClientHeight(); |
|
new_height = (clientHeight-300) + 'px'; |
|
chat_row.style.height = new_height; |
|
window['chat_bot'].style.height = new_height; |
|
window['chat_bot'].children[2].style.height = new_height; |
|
window['chat_bot1'].style.height = new_height; |
|
window['chat_bot1'].children[2].style.height = new_height; |
|
prompt_row.children[0].style.flex = 'auto'; |
|
prompt_row.children[0].style.width = '100%'; |
|
|
|
window['checkChange'] = function checkChange() { |
|
try { |
|
if (window['chat_bot'].children[2].children[0].children.length > window['div_count']) { |
|
new_len = window['chat_bot'].children[2].children[0].children.length - window['div_count']; |
|
for (var i = 0; i < new_len; i++) { |
|
new_div = window['chat_bot'].children[2].children[0].children[window['div_count'] + i].cloneNode(true); |
|
window['chat_bot1'].children[2].children[0].appendChild(new_div); |
|
} |
|
window['div_count'] = chat_bot.children[2].children[0].children.length; |
|
} |
|
if (window['chat_bot'].children[0].children.length > 1) { |
|
window['chat_bot1'].children[1].textContent = window['chat_bot'].children[0].children[1].textContent; |
|
} else { |
|
window['chat_bot1'].children[1].textContent = ''; |
|
} |
|
|
|
} catch(e) { |
|
} |
|
} |
|
window['checkChange_interval'] = window.setInterval("window.checkChange()", 500); |
|
} |
|
|
|
return false; |
|
}""" |
|
|
|
with gr.Blocks(title="Talk to chatGPT") as demo: |
|
gr.Markdown("## Talk to chatGPT ##") |
|
gr.HTML( |
|
"<p> Demo uses <a href='https://huggingface.co/openai/whisper-base.en' class='underline'>Whisper</a> to convert the input speech" |
|
" to transcribed text, <a href='https://chat.openai.com/chat' class='underline'>chatGPT</a> to generate responses, and <a" |
|
" href='https://huggingface.co/speechbrain/tts-tacotron2-ljspeech' class='underline'>tacotron2</a> to convert the response to" |
|
" output speech: </p>" |
|
) |
|
gr.HTML("<p> <center><img src='https://raw.githubusercontent.com/sanchit-gandhi/codesnippets/main/pipeline.png' width='870'></center> </p>") |
|
gr.HTML( |
|
"<p>You can duplicate this space and use your own session token: <a style='display:inline-block'" |
|
" href='https://huggingface.co/spaces/sanchit-gandhi/chatGPT?duplicate=true'><img" |
|
" src='https://img.shields.io/badge/-Duplicate%20Space-blue?labelColor=white&style=flat&logo=&logoWidth=10'" |
|
" alt='Duplicate Space'></a></p>" |
|
) |
|
gr.HTML( |
|
"<p> Instructions on how to obtain your session token can be found in the video <a style='display:inline-block'" |
|
" href='https://youtu.be/TdNSj_qgdFk?t=175'><font style='color:blue;weight:bold;'>here</font></a>." |
|
" Add your session token by going to <i>Settings</i> -> <i>New secret</i> and add the token under the name <i>SessionToken</i>. </p>" |
|
) |
|
with gr.Group(elem_id="page_1", visible=True) as page_1: |
|
with gr.Box(): |
|
with gr.Row(): |
|
start_button = gr.Button("Let's talk to chatGPT! 🗣", elem_id="start-btn", visible=True) |
|
start_button.click(fn=None, inputs=[], outputs=[], _js=start_work) |
|
|
|
with gr.Group(elem_id="page_2", visible=False) as page_2: |
|
with gr.Row(elem_id="chat_row"): |
|
chatbot = gr.Chatbot(elem_id="chat_bot", visible=False).style(color_map=("green", "blue")) |
|
chatbot1 = gr.Chatbot(elem_id="chat_bot1").style(color_map=("green", "blue")) |
|
with gr.Row(): |
|
prompt_input_audio = gr.Audio( |
|
source="microphone", |
|
type="filepath", |
|
label="Record Audio Input", |
|
) |
|
prompt_output_audio = gr.Audio() |
|
|
|
reset_conversation = gr.Checkbox(label="Reset conversation?", value=False) |
|
with gr.Row(elem_id="prompt_row"): |
|
chat_history = gr.Textbox(lines=4, label="prompt", visible=False) |
|
submit_btn = gr.Button(value="Send to chatGPT", elem_id="submit-btn").style( |
|
margin=True, |
|
rounded=(True, True, True, True), |
|
width=100, |
|
) |
|
|
|
submit_btn.click( |
|
fn=chat, |
|
inputs=[prompt_input_audio, chat_history, reset_conversation], |
|
outputs=[chatbot, chat_history, prompt_output_audio], |
|
) |
|
|
|
demo.launch(debug=True) |
|
|