en-tts / app_old.py
stefantaubert's picture
update
a31f9ef
raw
history blame
2.14 kB
from logging import getLogger
import gradio as gr
import numpy as np
import numpy.typing as npt
from en_tts import Synthesizer, Transcriber
FLOAT32_64_MIN_WAV = -1.0
FLOAT32_64_MAX_WAV = 1.0
INT16_MIN = np.iinfo(np.int16).min # -32768 = -(2**15)
INT16_MAX = np.iinfo(np.int16).max # 32767 = 2**15 - 1
INT32_MIN = np.iinfo(np.int32).min # -2147483648 = -(2**31)
INT32_MAX = np.iinfo(np.int32).max # 2147483647 = 2**31 - 1
logger = getLogger(__name__)
logger.info("Initializing transcriber...")
transcriber = Transcriber()
logger.info("Initializing synthesizer...")
synthesizer = Synthesizer()
def synt(text: str) -> str:
logger.info("Transcribing...")
text_ipa = transcriber.transcribe_to_ipa(text)
logger.info("Synthesizing...")
audio = synthesizer.synthesize(text_ipa)
audio_int = convert_wav(audio, np.int16)
return 22050, audio_int
def get_max_value(dtype):
# see wavfile.write() max positive eg. on 16-bit PCM is 32767
if dtype == np.int16:
return INT16_MAX
if dtype == np.int32:
return INT32_MAX
if dtype in (np.float32, np.float64):
return FLOAT32_64_MAX_WAV
assert False
def get_min_value(dtype):
if dtype == np.int16:
return INT16_MIN
if dtype == np.int32:
return INT32_MIN
if dtype in (np.float32, np.float64):
return FLOAT32_64_MIN_WAV
assert False
def convert_wav(wav: npt.NDArray[np.float64], to_dtype):
'''
if the wav is over-amplified the result will also be over-amplified.
'''
if wav.dtype != to_dtype:
wav = wav / (-1 * get_min_value(wav.dtype)) * get_max_value(to_dtype)
if to_dtype in (np.int16, np.int32):
# the default seems to be np.fix instead of np.round on wav.astype()
wav = np.round(wav, 0)
wav = wav.astype(to_dtype)
return wav
example_text = "When the sunlight strikes raindrops in the air, they act as a prism and form a rainbow."
iface = gr.Interface(
fn=synt,
inputs=[gr.Textbox(example_text, label="Text")],
outputs=[gr.Audio(type="numpy", label="Speech", autoplay=True)],
)
iface.launch()