Spaces:

lsdrs
/

voice

Running

App Files Files Community

lsdrs commited on Feb 6

Commit

f1df36a

•

1 Parent(s): 5f5b2d6

build: adicionado parametros

Browse files

Files changed (6) hide show

.gitignore +178 -0
__pycache__/app.cpython-310.pyc +0 -0
__pycache__/utils.cpython-310.pyc +0 -0
app.py +24 -22
requirements.txt +2 -1
utils.py +39 -23

.gitignore ADDED Viewed

	@@ -0,0 +1,178 @@

+# Created by https://www.toptal.com/developers/gitignore/api/python
+# Edit at https://www.toptal.com/developers/gitignore?templates=python
+### Python ###
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+### Python Patch ###
+# Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration
+poetry.toml
+# ruff
+.ruff_cache/
+# LSP config files
+pyrightconfig.json
+# End of https://www.toptal.com/developers/gitignore/api/python
+.voice

__pycache__/app.cpython-310.pyc DELETED Viewed

Binary file (1.91 kB)

__pycache__/utils.cpython-310.pyc DELETED Viewed

Binary file (5.62 kB)

app.py CHANGED Viewed

@@ -1,17 +1,25 @@
 """Deploying AI Voice Chatbot Gradio App."""
 from gradio import Audio, Interface, Textbox, Checkbox
 from typing import Tuple
 from utils import (TextGenerationPipeline, from_en_translation,
                    html_audio_autoplay, stt, to_en_translation, tts,
                    tts_polly, tts_to_bytesio, tts_polly_to_bytesio)
-max_answer_length = 100
-desired_language = "pt"
 response_generator_pipe = TextGenerationPipeline(max_length=max_answer_length)
-def main(audio: object, use_polly: bool) -> Tuple[str, str, str, object]:
     """Calls functions for deploying gradio app.
     It responds both verbally and in text
@@ -24,44 +32,38 @@ def main(audio: object, use_polly: bool) -> Tuple[str, str, str, object]:
         tuple containing
         - user_speech_text (str) : recognized speech
-        - bot_response_de (str) : translated answer of bot
         - bot_response_en (str) : bot's original answer
         - html (object) : autoplayer for bot's speech
     """
     user_speech_text = stt(audio, desired_language)
-    tranlated_text = to_en_translation(user_speech_text, desired_language)
-    bot_response_en = response_generator_pipe(tranlated_text)
-    bot_response_de = from_en_translation(bot_response_en, desired_language)
     if use_polly:
-        bot_voice = tts_polly(bot_response_de)
         bot_voice_bytes = tts_polly_to_bytesio(bot_voice)
     else:
-        bot_voice = tts(bot_response_de, desired_language)
         bot_voice_bytes = tts_to_bytesio(bot_voice)
     html = html_audio_autoplay(bot_voice_bytes)
-    return user_speech_text, bot_response_de, bot_response_en, html
 demo = Interface(
     fn=main,
     inputs=[
-        Audio(
-            source="microphone",
-            type="filepath",
-        ),
-        Checkbox(
-            value=False,
-            label="Usar Polly?"
-        )
     ],
     outputs=[
         Textbox(label="Você disse: "),
         Textbox(label="AI disse: "),
-        Textbox(label="AI disse (English): "),
         "html",
     ],
     live=True,
     allow_flagging="never")
-demo.launch(debug=True)

 """Deploying AI Voice Chatbot Gradio App."""
 from gradio import Audio, Interface, Textbox, Checkbox
 from typing import Tuple
+from dotenv import load_dotenv
+import os
 from utils import (TextGenerationPipeline, from_en_translation,
                    html_audio_autoplay, stt, to_en_translation, tts,
                    tts_polly, tts_to_bytesio, tts_polly_to_bytesio)
+load_dotenv()
+max_answer_length = os.getenv('MAX_ANSWER_LENGTH', 100)
+desired_language = os.getenv('DESIRED_LANGUAGE', 'pt')
+polly_language = os.getenv('POLLY_LANGUAGE_LANGUAGE', 'pt-BR')
 response_generator_pipe = TextGenerationPipeline(max_length=max_answer_length)
+def main(audio: object, use_polly: bool) -> Tuple[str, str, object]:
+    if audio is None:
+        return "", "", ""
     """Calls functions for deploying gradio app.
     It responds both verbally and in text
         tuple containing
         - user_speech_text (str) : recognized speech
+        - bot_response_pt (str) : translated answer of bot
         - bot_response_en (str) : bot's original answer
         - html (object) : autoplayer for bot's speech
     """
     user_speech_text = stt(audio, desired_language)
+    translated_text = to_en_translation(user_speech_text, desired_language)
+    bot_response_en = response_generator_pipe(translated_text)
+    bot_response_pt = from_en_translation(bot_response_en, desired_language)
     if use_polly:
+        bot_voice = tts_polly(bot_response_pt, polly_language)
         bot_voice_bytes = tts_polly_to_bytesio(bot_voice)
     else:
+        bot_voice = tts(bot_response_pt, desired_language)
         bot_voice_bytes = tts_to_bytesio(bot_voice)
     html = html_audio_autoplay(bot_voice_bytes)
+    return user_speech_text, bot_response_pt, html
 demo = Interface(
     fn=main,
     inputs=[
+        Audio(source="microphone", type="filepath"),
+        Checkbox(value=False, label="Usar Polly?")
     ],
     outputs=[
         Textbox(label="Você disse: "),
         Textbox(label="AI disse: "),
         "html",
     ],
     live=True,
     allow_flagging="never")
+demo.launch(debug=False)

requirements.txt CHANGED Viewed

@@ -5,4 +5,5 @@ SpeechRecognition==3.9.0
 mtranslate==1.8
 gTTS==2.3.0
 transformers==4.25.1
-boto3

 mtranslate==1.8
 gTTS==2.3.0
 transformers==4.25.1
+boto3
+python-dotenv

utils.py CHANGED Viewed

@@ -1,11 +1,8 @@
 """Some utility functions for the app."""
 from base64 import b64encode
 from io import BytesIO
 import os
 import sys
-from tempfile import gettempdir
 from gtts import gTTS
 from mtranslate import translate
 from speech_recognition import AudioFile, Recognizer
@@ -13,10 +10,21 @@ from transformers import (BlenderbotSmallForConditionalGeneration,
                           BlenderbotSmallTokenizer)
 from contextlib import closing
 import boto3
-client = boto3.client('polly')
 def stt(audio: object, language: str) -> str:
     """Converts speech to text.
@@ -132,16 +140,27 @@ def tts(text: str, language: str) -> object:
     return gTTS(text=text, lang=language, slow=False)
-def tts_polly(text: str) -> object:
     response = client.synthesize_speech(
         Engine='neural',
-        LanguageCode='pt-BR',
         OutputFormat='mp3',
-        Text=text,
-        VoiceId='Camila')
     return response
 def tts_polly_to_bytesio(polly_object: object) -> bytes:
     # Access the audio stream from the response
     if "AudioStream" in polly_object:
@@ -149,26 +168,23 @@ def tts_polly_to_bytesio(polly_object: object) -> bytes:
         # number of parallel connections. Here we are using contextlib.closing to
         # ensure the close method of the stream object will be called automatically
         # at the end of the with statement's scope.
-            with closing(polly_object["AudioStream"]) as stream:
-                try:
-                    # output = os.path.join(gettempdir(), "speech.mp3")
-                    bytes_object = BytesIO()
-                    bytes_object.write(stream.read())
-                    # # Open a file for writing the output as a binary stream
-                    # with open(output, "wb") as file:
-                    #     file.write(bytes_object)
-                    bytes_object.seek(0)
-                    return bytes_object.getvalue()
-                except IOError as error:
-                    # Could not write to file, exit gracefully
-                    print(error)
-                    sys.exit(-1)
     else:
         # The response didn't contain audio data, exit gracefully
         print("Could not stream audio")
         sys.exit(-1)
 def tts_to_bytesio(tts_object: object) -> bytes:
     """Converts tts object to bytes.

 """Some utility functions for the app."""
 from base64 import b64encode
 from io import BytesIO
 import os
 import sys
 from gtts import gTTS
 from mtranslate import translate
 from speech_recognition import AudioFile, Recognizer
                           BlenderbotSmallTokenizer)
 from contextlib import closing
 import boto3
+from botocore.config import Config
+import time
+def log_execution_time(func):
+    def wrapper(*args, **kwargs):
+        start_time = time.time()
+        result = func(*args, **kwargs)
+        end_time = time.time()
+        execution_time = end_time - start_time
+        print(f"Execution time of {func.__name__}: {execution_time} seconds")
+        return result
+    return wrapper
 def stt(audio: object, language: str) -> str:
     """Converts speech to text.
     return gTTS(text=text, lang=language, slow=False)
+def tts_polly(text: str, language: str) -> object:
+    my_config = Config(
+        region_name=os.getenv('AWS_REGION', 'us-east-1'),
+        # signature_version = 'v4',
+        # retries = {
+        #     'max_attempts': 10,
+        #     'mode': 'standard'
+        # }
+    )
+    client = boto3.client('polly', config=my_config)
     response = client.synthesize_speech(
         Engine='neural',
         OutputFormat='mp3',
+        VoiceId='Camila',
+        LanguageCode=language,
+        Text=text)
     return response
 def tts_polly_to_bytesio(polly_object: object) -> bytes:
     # Access the audio stream from the response
     if "AudioStream" in polly_object:
         # number of parallel connections. Here we are using contextlib.closing to
         # ensure the close method of the stream object will be called automatically
         # at the end of the with statement's scope.
+        with closing(polly_object["AudioStream"]) as stream:
+            try:
+                bytes_object = BytesIO()
+                bytes_object.write(stream.read())
+                bytes_object.seek(0)
+                return bytes_object.getvalue()
+            except IOError as error:
+                # Could not write to bytes, exit gracefully
+                print(error)
+                sys.exit(-1)
     else:
         # The response didn't contain audio data, exit gracefully
         print("Could not stream audio")
         sys.exit(-1)
 def tts_to_bytesio(tts_object: object) -> bytes:
     """Converts tts object to bytes.