Spaces:

lsdrs
/

voice

Sleeping

File size: 2,286 Bytes

d867b41
5f5b2d6
d867b41
f1df36a
 
d867b41
 
5f5b2d6
d867b41
f1df36a
 
fdb81f3
f1df36a
 
fdb81f3
f1df36a
d867b41
 
 
fdb81f3
f1df36a
 
 
d867b41
 
 
 
 
 
 
5f5b2d6
d867b41
 
 
 
f1df36a
d867b41
 
 
 
f1df36a
 
 
 
5f5b2d6
f1df36a
5f5b2d6
 
f1df36a
5f5b2d6
f1df36a
d867b41
f1df36a
d867b41
 
 
 
 
f1df36a
fdb81f3
d867b41
 
5f5b2d6
 
d867b41
 
 
 
 
f1df36a

"""Deploying AI Voice Chatbot Gradio App."""
from gradio import Audio, Interface, Textbox, Checkbox
from typing import Tuple
from dotenv import load_dotenv
import os
from utils import (TextGenerationPipeline, from_en_translation,
                   html_audio_autoplay, stt, to_en_translation, tts,
                   tts_polly, tts_to_bytesio, tts_polly_to_bytesio)

load_dotenv()

max_answer_length = int(os.getenv('MAX_ANSWER_LENGTH', 100))
desired_language = os.getenv('DESIRED_LANGUAGE', 'pt')
polly_language = os.getenv('POLLY_LANGUAGE_LANGUAGE', 'pt-BR')
use_polly = os.getenv('USE_POLLY', False)

response_generator_pipe = TextGenerationPipeline(max_length=max_answer_length)


def main(audio: object, use_polly: bool = use_polly) -> Tuple[str, str, object]:
    if audio is None:
        return "", "", ""

    """Calls functions for deploying gradio app.

    It responds both verbally and in text
    by taking voice input from user.

    Args:
        audio (object): recorded speech of user
        use_polly (bool): use aws polly
    Returns:
        tuple containing

        - user_speech_text (str) : recognized speech
        - bot_response_pt (str) : translated answer of bot
        - bot_response_en (str) : bot's original answer
        - html (object) : autoplayer for bot's speech
    """
    user_speech_text = stt(audio, desired_language)
    translated_text = to_en_translation(user_speech_text, desired_language)
    bot_response_en = response_generator_pipe(translated_text)
    bot_response_pt = from_en_translation(bot_response_en, desired_language)

    if use_polly:
        bot_voice = tts_polly(bot_response_pt, polly_language)
        bot_voice_bytes = tts_polly_to_bytesio(bot_voice)
    else:
        bot_voice = tts(bot_response_pt, desired_language)
        bot_voice_bytes = tts_to_bytesio(bot_voice)

    html = html_audio_autoplay(bot_voice_bytes)
    return user_speech_text, bot_response_pt, html


demo = Interface(
    fn=main,
    inputs=[
        Audio(source="microphone", type="filepath"),
        Checkbox(value=use_polly, label="Usar Polly?")
    ],
    outputs=[
        Textbox(label="Você disse: "),
        Textbox(label="AI disse: "),
        "html",
    ],
    live=True,
    allow_flagging="never")

demo.launch(debug=False)