Spaces:

stefantaubert
/

en-tts

Running

App Files Files Community

en-tts / app_old.py

stefantaubert

update

a31f9ef 9 months ago

raw

history blame

2.14 kB

	from logging import getLogger

	import gradio as gr
	import numpy as np
	import numpy.typing as npt
	from en_tts import Synthesizer, Transcriber

	FLOAT32_64_MIN_WAV = -1.0
	FLOAT32_64_MAX_WAV = 1.0
	INT16_MIN = np.iinfo(np.int16).min # -32768 = -(2**15)
	INT16_MAX = np.iinfo(np.int16).max # 32767 = 2**15 - 1
	INT32_MIN = np.iinfo(np.int32).min # -2147483648 = -(2**31)
	INT32_MAX = np.iinfo(np.int32).max # 2147483647 = 2**31 - 1

	logger = getLogger(__name__)
	logger.info("Initializing transcriber...")
	transcriber = Transcriber()

	logger.info("Initializing synthesizer...")
	synthesizer = Synthesizer()


	def synt(text: str) -> str:
	logger.info("Transcribing...")
	text_ipa = transcriber.transcribe_to_ipa(text)

	logger.info("Synthesizing...")
	audio = synthesizer.synthesize(text_ipa)
	audio_int = convert_wav(audio, np.int16)
	return 22050, audio_int


	def get_max_value(dtype):
	# see wavfile.write() max positive eg. on 16-bit PCM is 32767
	if dtype == np.int16:
	return INT16_MAX

	if dtype == np.int32:
	return INT32_MAX

	if dtype in (np.float32, np.float64):
	return FLOAT32_64_MAX_WAV

	assert False


	def get_min_value(dtype):
	if dtype == np.int16:
	return INT16_MIN

	if dtype == np.int32:
	return INT32_MIN

	if dtype in (np.float32, np.float64):
	return FLOAT32_64_MIN_WAV

	assert False


	def convert_wav(wav: npt.NDArray[np.float64], to_dtype):
	'''
	if the wav is over-amplified the result will also be over-amplified.
	'''
	if wav.dtype != to_dtype:
	wav = wav / (-1 * get_min_value(wav.dtype)) * get_max_value(to_dtype)
	if to_dtype in (np.int16, np.int32):
	# the default seems to be np.fix instead of np.round on wav.astype()
	wav = np.round(wav, 0)
	wav = wav.astype(to_dtype)

	return wav


	example_text = "When the sunlight strikes raindrops in the air, they act as a prism and form a rainbow."

	iface = gr.Interface(
	fn=synt,
	inputs=[gr.Textbox(example_text, label="Text")],
	outputs=[gr.Audio(type="numpy", label="Speech", autoplay=True)],
	)
	iface.launch()