lsdrs commited on
Commit
f1df36a
1 Parent(s): 5f5b2d6

build: adicionado parametros

Browse files
.gitignore ADDED
@@ -0,0 +1,178 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Created by https://www.toptal.com/developers/gitignore/api/python
2
+ # Edit at https://www.toptal.com/developers/gitignore?templates=python
3
+
4
+ ### Python ###
5
+ # Byte-compiled / optimized / DLL files
6
+ __pycache__/
7
+ *.py[cod]
8
+ *$py.class
9
+
10
+ # C extensions
11
+ *.so
12
+
13
+ # Distribution / packaging
14
+ .Python
15
+ build/
16
+ develop-eggs/
17
+ dist/
18
+ downloads/
19
+ eggs/
20
+ .eggs/
21
+ lib/
22
+ lib64/
23
+ parts/
24
+ sdist/
25
+ var/
26
+ wheels/
27
+ share/python-wheels/
28
+ *.egg-info/
29
+ .installed.cfg
30
+ *.egg
31
+ MANIFEST
32
+
33
+ # PyInstaller
34
+ # Usually these files are written by a python script from a template
35
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
36
+ *.manifest
37
+ *.spec
38
+
39
+ # Installer logs
40
+ pip-log.txt
41
+ pip-delete-this-directory.txt
42
+
43
+ # Unit test / coverage reports
44
+ htmlcov/
45
+ .tox/
46
+ .nox/
47
+ .coverage
48
+ .coverage.*
49
+ .cache
50
+ nosetests.xml
51
+ coverage.xml
52
+ *.cover
53
+ *.py,cover
54
+ .hypothesis/
55
+ .pytest_cache/
56
+ cover/
57
+
58
+ # Translations
59
+ *.mo
60
+ *.pot
61
+
62
+ # Django stuff:
63
+ *.log
64
+ local_settings.py
65
+ db.sqlite3
66
+ db.sqlite3-journal
67
+
68
+ # Flask stuff:
69
+ instance/
70
+ .webassets-cache
71
+
72
+ # Scrapy stuff:
73
+ .scrapy
74
+
75
+ # Sphinx documentation
76
+ docs/_build/
77
+
78
+ # PyBuilder
79
+ .pybuilder/
80
+ target/
81
+
82
+ # Jupyter Notebook
83
+ .ipynb_checkpoints
84
+
85
+ # IPython
86
+ profile_default/
87
+ ipython_config.py
88
+
89
+ # pyenv
90
+ # For a library or package, you might want to ignore these files since the code is
91
+ # intended to run in multiple environments; otherwise, check them in:
92
+ # .python-version
93
+
94
+ # pipenv
95
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
96
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
97
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
98
+ # install all needed dependencies.
99
+ #Pipfile.lock
100
+
101
+ # poetry
102
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
103
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
104
+ # commonly ignored for libraries.
105
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
106
+ #poetry.lock
107
+
108
+ # pdm
109
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
110
+ #pdm.lock
111
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
112
+ # in version control.
113
+ # https://pdm.fming.dev/#use-with-ide
114
+ .pdm.toml
115
+
116
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
117
+ __pypackages__/
118
+
119
+ # Celery stuff
120
+ celerybeat-schedule
121
+ celerybeat.pid
122
+
123
+ # SageMath parsed files
124
+ *.sage.py
125
+
126
+ # Environments
127
+ .env
128
+ .venv
129
+ env/
130
+ venv/
131
+ ENV/
132
+ env.bak/
133
+ venv.bak/
134
+
135
+ # Spyder project settings
136
+ .spyderproject
137
+ .spyproject
138
+
139
+ # Rope project settings
140
+ .ropeproject
141
+
142
+ # mkdocs documentation
143
+ /site
144
+
145
+ # mypy
146
+ .mypy_cache/
147
+ .dmypy.json
148
+ dmypy.json
149
+
150
+ # Pyre type checker
151
+ .pyre/
152
+
153
+ # pytype static type analyzer
154
+ .pytype/
155
+
156
+ # Cython debug symbols
157
+ cython_debug/
158
+
159
+ # PyCharm
160
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
161
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
162
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
163
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
164
+ #.idea/
165
+
166
+ ### Python Patch ###
167
+ # Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration
168
+ poetry.toml
169
+
170
+ # ruff
171
+ .ruff_cache/
172
+
173
+ # LSP config files
174
+ pyrightconfig.json
175
+
176
+ # End of https://www.toptal.com/developers/gitignore/api/python
177
+
178
+ .voice
__pycache__/app.cpython-310.pyc DELETED
Binary file (1.91 kB)
 
__pycache__/utils.cpython-310.pyc DELETED
Binary file (5.62 kB)
 
app.py CHANGED
@@ -1,17 +1,25 @@
1
  """Deploying AI Voice Chatbot Gradio App."""
2
  from gradio import Audio, Interface, Textbox, Checkbox
3
  from typing import Tuple
4
-
 
5
  from utils import (TextGenerationPipeline, from_en_translation,
6
  html_audio_autoplay, stt, to_en_translation, tts,
7
  tts_polly, tts_to_bytesio, tts_polly_to_bytesio)
8
 
9
- max_answer_length = 100
10
- desired_language = "pt"
 
 
 
 
11
  response_generator_pipe = TextGenerationPipeline(max_length=max_answer_length)
12
 
13
 
14
- def main(audio: object, use_polly: bool) -> Tuple[str, str, str, object]:
 
 
 
15
  """Calls functions for deploying gradio app.
16
 
17
  It responds both verbally and in text
@@ -24,44 +32,38 @@ def main(audio: object, use_polly: bool) -> Tuple[str, str, str, object]:
24
  tuple containing
25
 
26
  - user_speech_text (str) : recognized speech
27
- - bot_response_de (str) : translated answer of bot
28
  - bot_response_en (str) : bot's original answer
29
  - html (object) : autoplayer for bot's speech
30
  """
31
  user_speech_text = stt(audio, desired_language)
32
- tranlated_text = to_en_translation(user_speech_text, desired_language)
33
- bot_response_en = response_generator_pipe(tranlated_text)
34
- bot_response_de = from_en_translation(bot_response_en, desired_language)
 
35
  if use_polly:
36
- bot_voice = tts_polly(bot_response_de)
37
  bot_voice_bytes = tts_polly_to_bytesio(bot_voice)
38
  else:
39
- bot_voice = tts(bot_response_de, desired_language)
40
  bot_voice_bytes = tts_to_bytesio(bot_voice)
41
-
42
  html = html_audio_autoplay(bot_voice_bytes)
43
- return user_speech_text, bot_response_de, bot_response_en, html
44
 
45
 
46
  demo = Interface(
47
  fn=main,
48
  inputs=[
49
- Audio(
50
- source="microphone",
51
- type="filepath",
52
- ),
53
- Checkbox(
54
- value=False,
55
- label="Usar Polly?"
56
- )
57
  ],
58
  outputs=[
59
  Textbox(label="Você disse: "),
60
  Textbox(label="AI disse: "),
61
- Textbox(label="AI disse (English): "),
62
  "html",
63
  ],
64
  live=True,
65
  allow_flagging="never")
66
 
67
- demo.launch(debug=True)
 
1
  """Deploying AI Voice Chatbot Gradio App."""
2
  from gradio import Audio, Interface, Textbox, Checkbox
3
  from typing import Tuple
4
+ from dotenv import load_dotenv
5
+ import os
6
  from utils import (TextGenerationPipeline, from_en_translation,
7
  html_audio_autoplay, stt, to_en_translation, tts,
8
  tts_polly, tts_to_bytesio, tts_polly_to_bytesio)
9
 
10
+ load_dotenv()
11
+
12
+ max_answer_length = os.getenv('MAX_ANSWER_LENGTH', 100)
13
+ desired_language = os.getenv('DESIRED_LANGUAGE', 'pt')
14
+ polly_language = os.getenv('POLLY_LANGUAGE_LANGUAGE', 'pt-BR')
15
+
16
  response_generator_pipe = TextGenerationPipeline(max_length=max_answer_length)
17
 
18
 
19
+ def main(audio: object, use_polly: bool) -> Tuple[str, str, object]:
20
+ if audio is None:
21
+ return "", "", ""
22
+
23
  """Calls functions for deploying gradio app.
24
 
25
  It responds both verbally and in text
 
32
  tuple containing
33
 
34
  - user_speech_text (str) : recognized speech
35
+ - bot_response_pt (str) : translated answer of bot
36
  - bot_response_en (str) : bot's original answer
37
  - html (object) : autoplayer for bot's speech
38
  """
39
  user_speech_text = stt(audio, desired_language)
40
+ translated_text = to_en_translation(user_speech_text, desired_language)
41
+ bot_response_en = response_generator_pipe(translated_text)
42
+ bot_response_pt = from_en_translation(bot_response_en, desired_language)
43
+
44
  if use_polly:
45
+ bot_voice = tts_polly(bot_response_pt, polly_language)
46
  bot_voice_bytes = tts_polly_to_bytesio(bot_voice)
47
  else:
48
+ bot_voice = tts(bot_response_pt, desired_language)
49
  bot_voice_bytes = tts_to_bytesio(bot_voice)
50
+
51
  html = html_audio_autoplay(bot_voice_bytes)
52
+ return user_speech_text, bot_response_pt, html
53
 
54
 
55
  demo = Interface(
56
  fn=main,
57
  inputs=[
58
+ Audio(source="microphone", type="filepath"),
59
+ Checkbox(value=False, label="Usar Polly?")
 
 
 
 
 
 
60
  ],
61
  outputs=[
62
  Textbox(label="Você disse: "),
63
  Textbox(label="AI disse: "),
 
64
  "html",
65
  ],
66
  live=True,
67
  allow_flagging="never")
68
 
69
+ demo.launch(debug=False)
requirements.txt CHANGED
@@ -5,4 +5,5 @@ SpeechRecognition==3.9.0
5
  mtranslate==1.8
6
  gTTS==2.3.0
7
  transformers==4.25.1
8
- boto3
 
 
5
  mtranslate==1.8
6
  gTTS==2.3.0
7
  transformers==4.25.1
8
+ boto3
9
+ python-dotenv
utils.py CHANGED
@@ -1,11 +1,8 @@
1
  """Some utility functions for the app."""
2
  from base64 import b64encode
3
  from io import BytesIO
4
-
5
  import os
6
  import sys
7
- from tempfile import gettempdir
8
-
9
  from gtts import gTTS
10
  from mtranslate import translate
11
  from speech_recognition import AudioFile, Recognizer
@@ -13,10 +10,21 @@ from transformers import (BlenderbotSmallForConditionalGeneration,
13
  BlenderbotSmallTokenizer)
14
  from contextlib import closing
15
  import boto3
 
16
 
17
- client = boto3.client('polly')
18
 
19
 
 
 
 
 
 
 
 
 
 
 
20
  def stt(audio: object, language: str) -> str:
21
  """Converts speech to text.
22
 
@@ -132,16 +140,27 @@ def tts(text: str, language: str) -> object:
132
  return gTTS(text=text, lang=language, slow=False)
133
 
134
 
135
- def tts_polly(text: str) -> object:
 
 
 
 
 
 
 
 
 
 
136
  response = client.synthesize_speech(
137
  Engine='neural',
138
- LanguageCode='pt-BR',
139
  OutputFormat='mp3',
140
- Text=text,
141
- VoiceId='Camila')
142
-
 
143
  return response
144
 
 
145
  def tts_polly_to_bytesio(polly_object: object) -> bytes:
146
  # Access the audio stream from the response
147
  if "AudioStream" in polly_object:
@@ -149,26 +168,23 @@ def tts_polly_to_bytesio(polly_object: object) -> bytes:
149
  # number of parallel connections. Here we are using contextlib.closing to
150
  # ensure the close method of the stream object will be called automatically
151
  # at the end of the with statement's scope.
152
- with closing(polly_object["AudioStream"]) as stream:
153
- try:
154
- # output = os.path.join(gettempdir(), "speech.mp3")
155
- bytes_object = BytesIO()
156
- bytes_object.write(stream.read())
157
- # # Open a file for writing the output as a binary stream
158
- # with open(output, "wb") as file:
159
- # file.write(bytes_object)
160
- bytes_object.seek(0)
161
- return bytes_object.getvalue()
162
- except IOError as error:
163
- # Could not write to file, exit gracefully
164
- print(error)
165
- sys.exit(-1)
166
 
167
  else:
168
  # The response didn't contain audio data, exit gracefully
169
  print("Could not stream audio")
170
  sys.exit(-1)
171
 
 
172
  def tts_to_bytesio(tts_object: object) -> bytes:
173
  """Converts tts object to bytes.
174
 
 
1
  """Some utility functions for the app."""
2
  from base64 import b64encode
3
  from io import BytesIO
 
4
  import os
5
  import sys
 
 
6
  from gtts import gTTS
7
  from mtranslate import translate
8
  from speech_recognition import AudioFile, Recognizer
 
10
  BlenderbotSmallTokenizer)
11
  from contextlib import closing
12
  import boto3
13
+ from botocore.config import Config
14
 
15
+ import time
16
 
17
 
18
+ def log_execution_time(func):
19
+ def wrapper(*args, **kwargs):
20
+ start_time = time.time()
21
+ result = func(*args, **kwargs)
22
+ end_time = time.time()
23
+ execution_time = end_time - start_time
24
+ print(f"Execution time of {func.__name__}: {execution_time} seconds")
25
+ return result
26
+ return wrapper
27
+
28
  def stt(audio: object, language: str) -> str:
29
  """Converts speech to text.
30
 
 
140
  return gTTS(text=text, lang=language, slow=False)
141
 
142
 
143
+ def tts_polly(text: str, language: str) -> object:
144
+ my_config = Config(
145
+ region_name=os.getenv('AWS_REGION', 'us-east-1'),
146
+ # signature_version = 'v4',
147
+ # retries = {
148
+ # 'max_attempts': 10,
149
+ # 'mode': 'standard'
150
+ # }
151
+ )
152
+
153
+ client = boto3.client('polly', config=my_config)
154
  response = client.synthesize_speech(
155
  Engine='neural',
 
156
  OutputFormat='mp3',
157
+ VoiceId='Camila',
158
+ LanguageCode=language,
159
+ Text=text)
160
+
161
  return response
162
 
163
+
164
  def tts_polly_to_bytesio(polly_object: object) -> bytes:
165
  # Access the audio stream from the response
166
  if "AudioStream" in polly_object:
 
168
  # number of parallel connections. Here we are using contextlib.closing to
169
  # ensure the close method of the stream object will be called automatically
170
  # at the end of the with statement's scope.
171
+ with closing(polly_object["AudioStream"]) as stream:
172
+ try:
173
+ bytes_object = BytesIO()
174
+ bytes_object.write(stream.read())
175
+ bytes_object.seek(0)
176
+ return bytes_object.getvalue()
177
+ except IOError as error:
178
+ # Could not write to bytes, exit gracefully
179
+ print(error)
180
+ sys.exit(-1)
 
 
 
 
181
 
182
  else:
183
  # The response didn't contain audio data, exit gracefully
184
  print("Could not stream audio")
185
  sys.exit(-1)
186
 
187
+
188
  def tts_to_bytesio(tts_object: object) -> bytes:
189
  """Converts tts object to bytes.
190