Spaces:
Sleeping
Sleeping
stefantaubert
commited on
Commit
•
f7244fb
1
Parent(s):
e3bcf30
update
Browse files- app.py +23 -2
- en_tts_app/__init__.py +4 -2
- en_tts_app/globals.py +4 -17
- en_tts_app/logging_configuration.py +2 -5
- en_tts_app/main.py +38 -4
- en_tts_gr/app.py +12 -11
app.py
CHANGED
@@ -1,3 +1,24 @@
|
|
1 |
-
|
|
|
2 |
|
3 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import sys
|
2 |
+
from functools import partial
|
3 |
|
4 |
+
from en_tts_app import initialize_app, run_main
|
5 |
+
from en_tts_gr import build_interface
|
6 |
+
|
7 |
+
exit_code = initialize_app()
|
8 |
+
if exit_code > 0:
|
9 |
+
sys.exit(exit_code)
|
10 |
+
|
11 |
+
interface = build_interface(cache_examples=False)
|
12 |
+
interface.queue()
|
13 |
+
|
14 |
+
launch_method = partial(
|
15 |
+
interface.launch,
|
16 |
+
share=False,
|
17 |
+
debug=True,
|
18 |
+
inbrowser=True,
|
19 |
+
quiet=False,
|
20 |
+
show_api=False,
|
21 |
+
)
|
22 |
+
|
23 |
+
exit_code = run_main(launch_method)
|
24 |
+
sys.exit(exit_code)
|
en_tts_app/__init__.py
CHANGED
@@ -1,3 +1,5 @@
|
|
1 |
from en_tts_app.app import initialize_app, run_main
|
2 |
-
from en_tts_app.globals import get_conf_dir, get_log_path, get_work_dir
|
3 |
-
from en_tts_app.
|
|
|
|
|
|
1 |
from en_tts_app.app import initialize_app, run_main
|
2 |
+
from en_tts_app.globals import APP_NAME, APP_VERSION, get_conf_dir, get_log_path, get_work_dir
|
3 |
+
from en_tts_app.logging_configuration import get_app_logger, get_file_logger
|
4 |
+
from en_tts_app.main import (load_models_to_cache, reset_log, reset_work_dir, synthesize_english,
|
5 |
+
synthesize_ipa)
|
en_tts_app/globals.py
CHANGED
@@ -1,8 +1,10 @@
|
|
1 |
-
import
|
2 |
from pathlib import Path
|
3 |
from tempfile import gettempdir
|
4 |
|
5 |
-
|
|
|
|
|
6 |
|
7 |
|
8 |
def get_conf_dir() -> Path:
|
@@ -17,18 +19,3 @@ def get_work_dir() -> Path:
|
|
17 |
|
18 |
def get_log_path() -> Path:
|
19 |
return Path(gettempdir()) / "en-tts.log"
|
20 |
-
|
21 |
-
|
22 |
-
def reset_log() -> None:
|
23 |
-
get_log_path().write_text("", "utf-8")
|
24 |
-
|
25 |
-
|
26 |
-
def reset_work_dir():
|
27 |
-
root_logger = get_cli_logger()
|
28 |
-
work_dir = get_work_dir()
|
29 |
-
|
30 |
-
if work_dir.is_dir():
|
31 |
-
root_logger.debug("Deleting working directory ...")
|
32 |
-
shutil.rmtree(work_dir)
|
33 |
-
root_logger.debug("Creating working directory ...")
|
34 |
-
work_dir.mkdir(parents=False, exist_ok=False)
|
|
|
1 |
+
from importlib.metadata import version
|
2 |
from pathlib import Path
|
3 |
from tempfile import gettempdir
|
4 |
|
5 |
+
APP_NAME = "en-tts"
|
6 |
+
|
7 |
+
APP_VERSION = version(APP_NAME)
|
8 |
|
9 |
|
10 |
def get_conf_dir() -> Path:
|
|
|
19 |
|
20 |
def get_log_path() -> Path:
|
21 |
return Path(gettempdir()) / "en-tts.log"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
en_tts_app/logging_configuration.py
CHANGED
@@ -2,14 +2,11 @@ import logging
|
|
2 |
import os
|
3 |
import platform
|
4 |
import sys
|
5 |
-
from importlib.metadata import version
|
6 |
from logging import Formatter, Handler, Logger, StreamHandler, getLogger
|
7 |
from pathlib import Path
|
8 |
from pkgutil import iter_modules
|
9 |
|
10 |
-
|
11 |
-
|
12 |
-
__version__ = version(__APP_NAME)
|
13 |
|
14 |
|
15 |
class ConsoleFormatter(logging.Formatter):
|
@@ -129,7 +126,7 @@ def log_sysinfo():
|
|
129 |
flogger = get_file_logger()
|
130 |
|
131 |
sys_version = sys.version.replace('\n', '')
|
132 |
-
flogger.debug(f"CLI version: {
|
133 |
flogger.debug(f"Python version: {sys_version}")
|
134 |
flogger.debug("Modules: %s", ', '.join(sorted(p.name for p in iter_modules())))
|
135 |
|
|
|
2 |
import os
|
3 |
import platform
|
4 |
import sys
|
|
|
5 |
from logging import Formatter, Handler, Logger, StreamHandler, getLogger
|
6 |
from pathlib import Path
|
7 |
from pkgutil import iter_modules
|
8 |
|
9 |
+
from en_tts_app.globals import APP_VERSION
|
|
|
|
|
10 |
|
11 |
|
12 |
class ConsoleFormatter(logging.Formatter):
|
|
|
126 |
flogger = get_file_logger()
|
127 |
|
128 |
sys_version = sys.version.replace('\n', '')
|
129 |
+
flogger.debug(f"CLI version: {APP_VERSION}")
|
130 |
flogger.debug(f"Python version: {sys_version}")
|
131 |
flogger.debug("Modules: %s", ', '.join(sorted(p.name for p in iter_modules())))
|
132 |
|
en_tts_app/main.py
CHANGED
@@ -3,6 +3,7 @@ import shutil
|
|
3 |
from pathlib import Path
|
4 |
from typing import Dict, Optional
|
5 |
|
|
|
6 |
from ordered_set import OrderedSet
|
7 |
from pronunciation_dictionary import PronunciationDict, SerializationOptions, save_dict
|
8 |
|
@@ -10,28 +11,61 @@ from en_tts.helper import get_default_device, normalize_audio
|
|
10 |
from en_tts.io import save_audio
|
11 |
from en_tts.synthesizer import Synthesizer
|
12 |
from en_tts.transcriber import Transcriber
|
13 |
-
from en_tts_app.globals import get_conf_dir,
|
14 |
from en_tts_app.logging_configuration import get_app_logger, get_file_logger, log_sysinfo
|
15 |
|
16 |
CACHE_TRANSCRIBER = "transcriber"
|
17 |
CACHE_SYNTHESIZER = "synthesizer"
|
18 |
|
19 |
|
20 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
cli_logger = get_app_logger()
|
22 |
cache = {}
|
23 |
|
24 |
conf_dir = get_conf_dir()
|
25 |
-
device = get_default_device()
|
26 |
|
27 |
cli_logger.info("Initializing Transcriber...")
|
28 |
cache[CACHE_TRANSCRIBER] = Transcriber(conf_dir)
|
29 |
|
|
|
|
|
|
|
30 |
cli_logger.info("Initializing Synthesizer...")
|
31 |
-
cache[CACHE_SYNTHESIZER] = Synthesizer(conf_dir,
|
32 |
return cache
|
33 |
|
34 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
def synthesize_english(text: str, cache: Dict, *, max_decoder_steps: int = 5000, sigma: float = 1.0, denoiser_strength: float = 0.0005, seed: int = 0, silence_sentences: float = 0.4, silence_paragraphs: float = 1.0, loglevel: int = 2, skip_normalization: bool = False, skip_sentence_separation: bool = False, custom_output: Optional[Path] = None) -> Path:
|
36 |
cli_logger = get_app_logger()
|
37 |
reset_log()
|
|
|
3 |
from pathlib import Path
|
4 |
from typing import Dict, Optional
|
5 |
|
6 |
+
import torch
|
7 |
from ordered_set import OrderedSet
|
8 |
from pronunciation_dictionary import PronunciationDict, SerializationOptions, save_dict
|
9 |
|
|
|
11 |
from en_tts.io import save_audio
|
12 |
from en_tts.synthesizer import Synthesizer
|
13 |
from en_tts.transcriber import Transcriber
|
14 |
+
from en_tts_app.globals import get_conf_dir, get_log_path, get_work_dir
|
15 |
from en_tts_app.logging_configuration import get_app_logger, get_file_logger, log_sysinfo
|
16 |
|
17 |
CACHE_TRANSCRIBER = "transcriber"
|
18 |
CACHE_SYNTHESIZER = "synthesizer"
|
19 |
|
20 |
|
21 |
+
def reset_work_dir():
|
22 |
+
root_logger = get_app_logger()
|
23 |
+
work_dir = get_work_dir()
|
24 |
+
|
25 |
+
if work_dir.is_dir():
|
26 |
+
root_logger.debug("Deleting working directory ...")
|
27 |
+
shutil.rmtree(work_dir)
|
28 |
+
root_logger.debug("Creating working directory ...")
|
29 |
+
work_dir.mkdir(parents=False, exist_ok=False)
|
30 |
+
|
31 |
+
|
32 |
+
def reset_log() -> None:
|
33 |
+
get_log_path().write_text("", "utf-8")
|
34 |
+
|
35 |
+
|
36 |
+
def load_models_to_cache(custom_device: torch.device = None) -> Dict:
|
37 |
cli_logger = get_app_logger()
|
38 |
cache = {}
|
39 |
|
40 |
conf_dir = get_conf_dir()
|
|
|
41 |
|
42 |
cli_logger.info("Initializing Transcriber...")
|
43 |
cache[CACHE_TRANSCRIBER] = Transcriber(conf_dir)
|
44 |
|
45 |
+
if custom_device is None:
|
46 |
+
custom_device = get_default_device()
|
47 |
+
|
48 |
cli_logger.info("Initializing Synthesizer...")
|
49 |
+
cache[CACHE_SYNTHESIZER] = Synthesizer(conf_dir, custom_device)
|
50 |
return cache
|
51 |
|
52 |
|
53 |
+
def synthesize_ipa(text_ipa: str, cache: Dict, *, max_decoder_steps: int = 5000, sigma: float = 1.0, denoiser_strength: float = 0.0005, seed: int = 0, silence_sentences: float = 0.4, silence_paragraphs: float = 1.0, loglevel: int = 2, custom_output: Optional[Path] = None):
|
54 |
+
if loglevel >= 1:
|
55 |
+
try_log_text(text_ipa, "text")
|
56 |
+
|
57 |
+
if custom_output is None:
|
58 |
+
custom_output = get_work_dir() / "output.wav"
|
59 |
+
|
60 |
+
|
61 |
+
output_path = synthesize_ipa_core(
|
62 |
+
text_ipa, cache[CACHE_SYNTHESIZER], custom_output,
|
63 |
+
max_decoder_steps=max_decoder_steps, sigma=sigma, denoiser_strength=denoiser_strength, seed=seed, silence_sentences=silence_sentences, silence_paragraphs=silence_paragraphs, loglevel=loglevel,
|
64 |
+
)
|
65 |
+
|
66 |
+
return output_path
|
67 |
+
|
68 |
+
|
69 |
def synthesize_english(text: str, cache: Dict, *, max_decoder_steps: int = 5000, sigma: float = 1.0, denoiser_strength: float = 0.0005, seed: int = 0, silence_sentences: float = 0.4, silence_paragraphs: float = 1.0, loglevel: int = 2, skip_normalization: bool = False, skip_sentence_separation: bool = False, custom_output: Optional[Path] = None) -> Path:
|
70 |
cli_logger = get_app_logger()
|
71 |
reset_log()
|
en_tts_gr/app.py
CHANGED
@@ -10,7 +10,7 @@ from typing import Dict
|
|
10 |
import gradio as gr
|
11 |
from scipy.io.wavfile import read
|
12 |
|
13 |
-
from en_tts_app import (get_log_path, get_work_dir, initialize_app,
|
14 |
load_models_to_cache, run_main, synthesize_english)
|
15 |
|
16 |
|
@@ -19,7 +19,7 @@ def run():
|
|
19 |
if exit_code > 0:
|
20 |
sys.exit(exit_code)
|
21 |
|
22 |
-
interface = build_interface()
|
23 |
interface.queue()
|
24 |
|
25 |
launch_method = partial(
|
@@ -35,7 +35,7 @@ def run():
|
|
35 |
sys.exit(exit_code)
|
36 |
|
37 |
|
38 |
-
def build_interface():
|
39 |
cache = load_models_to_cache()
|
40 |
|
41 |
fn = partial(synt, cache=cache)
|
@@ -172,10 +172,10 @@ def build_interface():
|
|
172 |
"When the sunlight strikes raindrops in the air, they act as a prism and form a rainbow.",
|
173 |
5000, 1.0, 0.0005, 0, 0.4, 1.0, False, False
|
174 |
],
|
175 |
-
[
|
176 |
-
|
177 |
-
|
178 |
-
],
|
179 |
],
|
180 |
fn=fn,
|
181 |
inputs=[
|
@@ -195,13 +195,13 @@ def build_interface():
|
|
195 |
dl_btn,
|
196 |
],
|
197 |
label="Examples",
|
198 |
-
cache_examples=
|
199 |
)
|
200 |
|
201 |
with gr.Tab("Info"):
|
202 |
with gr.Column():
|
203 |
gr.Markdown(
|
204 |
-
"""
|
205 |
### General information
|
206 |
|
207 |
- Speaker: Linda Johnson
|
@@ -228,7 +228,7 @@ def build_interface():
|
|
228 |
|
229 |
### Citation
|
230 |
|
231 |
-
Taubert, S. (2024). en-tts (Version
|
232 |
|
233 |
### Acknowledgments
|
234 |
|
@@ -240,12 +240,13 @@ def build_interface():
|
|
240 |
|
241 |
### App information
|
242 |
|
243 |
-
- Version:
|
244 |
- License: [MIT](https://github.com/stefantaubert/en-tts?tab=MIT-1-ov-file#readme)
|
245 |
- GitHub: [stefantaubert/en-tts](https://github.com/stefantaubert/en-tts)
|
246 |
"""
|
247 |
)
|
248 |
|
|
|
249 |
synt_btn.click(
|
250 |
fn=fn,
|
251 |
inputs=[
|
|
|
10 |
import gradio as gr
|
11 |
from scipy.io.wavfile import read
|
12 |
|
13 |
+
from en_tts_app import (APP_VERSION, get_log_path, get_work_dir, initialize_app,
|
14 |
load_models_to_cache, run_main, synthesize_english)
|
15 |
|
16 |
|
|
|
19 |
if exit_code > 0:
|
20 |
sys.exit(exit_code)
|
21 |
|
22 |
+
interface = build_interface(cache_examples=False)
|
23 |
interface.queue()
|
24 |
|
25 |
launch_method = partial(
|
|
|
35 |
sys.exit(exit_code)
|
36 |
|
37 |
|
38 |
+
def build_interface(cache_examples: bool = False):
|
39 |
cache = load_models_to_cache()
|
40 |
|
41 |
fn = partial(synt, cache=cache)
|
|
|
172 |
"When the sunlight strikes raindrops in the air, they act as a prism and form a rainbow.",
|
173 |
5000, 1.0, 0.0005, 0, 0.4, 1.0, False, False
|
174 |
],
|
175 |
+
# [
|
176 |
+
# "Please call Stella. Ask her to bring these things with her from the store: six spoons of fresh snow peas, five thick slabs of blue cheese, and maybe a snack for her brother Bob.\n\nWe also need a small plastic snake and a big toy frog for the kids. She can scoop these things into three red bags, and we will go meet her Wednesday at the train station.",
|
177 |
+
# 5000, 1.0, 0.0005, 0, 0.4, 1.0, False, False
|
178 |
+
# ],
|
179 |
],
|
180 |
fn=fn,
|
181 |
inputs=[
|
|
|
195 |
dl_btn,
|
196 |
],
|
197 |
label="Examples",
|
198 |
+
cache_examples=cache_examples,
|
199 |
)
|
200 |
|
201 |
with gr.Tab("Info"):
|
202 |
with gr.Column():
|
203 |
gr.Markdown(
|
204 |
+
f"""
|
205 |
### General information
|
206 |
|
207 |
- Speaker: Linda Johnson
|
|
|
228 |
|
229 |
### Citation
|
230 |
|
231 |
+
Taubert, S. (2024). en-tts (Version {APP_VERSION}) [Computer software]. https://doi.org/10.5281/zenodo.11032264
|
232 |
|
233 |
### Acknowledgments
|
234 |
|
|
|
240 |
|
241 |
### App information
|
242 |
|
243 |
+
- Version: {APP_VERSION}
|
244 |
- License: [MIT](https://github.com/stefantaubert/en-tts?tab=MIT-1-ov-file#readme)
|
245 |
- GitHub: [stefantaubert/en-tts](https://github.com/stefantaubert/en-tts)
|
246 |
"""
|
247 |
)
|
248 |
|
249 |
+
# pylint: disable=E1101:no-member
|
250 |
synt_btn.click(
|
251 |
fn=fn,
|
252 |
inputs=[
|