Spaces:

jlopez00
/

tts-service

Runtime error

App Files Files Community

tts-service / tts_service /tts.py

jlopez00

Upload folder using huggingface_hub

2c01ee6 verified 20 days ago

raw

history blame

3.88 kB

	import asyncio
	import json
	import logging
	import os
	from functools import lru_cache

	import edge_tts
	import gradio as gr

	from tts_service.utils import cache_path
	from tts_service.voices import voice_manager

	log = logging.getLogger(__name__)


	@lru_cache(maxsize=None)
	def import_voice_converter():
	from rvc.infer.infer import VoiceConverter

	return VoiceConverter()


	# TTS
	async def run_tts_script(
	text: str,
	voice_name: str,
	rate: int = 0,
	progress=gr.Progress(), # noqa: B008
	) -> tuple[str, str]:
	async def update_progress(pct, msg) -> None:
	log.debug("Progress: %.1f%%: %s", pct * 100, msg)
	progress(pct, msg)
	await asyncio.sleep(0)

	log.info("Synthesizing text (%s chars)", len(text))

	await update_progress(0, "Starting...")
	voice = voice_manager.voices[voice_name]
	format = "wav"

	text = text.strip()
	output_tts_path = cache_path(voice.tts, "", rate, text, extension=format)
	text_ptr = 0
	if not os.path.exists(output_tts_path):
	rates = f"+{rate}%" if rate >= 0 else f"{rate}%"
	communicate = edge_tts.Communicate(text, voice.tts, rate=rates)
	with open(output_tts_path, "wb") as f:
	async for chunk in communicate.stream():
	chunk_type = chunk["type"]
	if chunk_type == "audio":
	f.write(chunk["data"])
	elif chunk_type == "WordBoundary":
	chunk_text = chunk["text"]
	text_index = text.index(chunk_text, text_ptr)
	if text_index == -1:
	log.warning("Extraneous text received from edge tts: %s", chunk_text)
	continue
	text_ptr = text_index + len(chunk_text)
	pct_complete = text_ptr / len(text)
	log.debug("%.1f%%: %s", pct_complete * 100, chunk)
	await update_progress(pct_complete / 2, "Synthesizing...")
	else:
	log.warning("Unknown chunk type: %s: %s", chunk_type, json.dumps(chunk))

	output_rvc_path = cache_path(voice.tts, voice.name, rate, text, extension=format)
	if not os.path.exists(output_rvc_path):
	infer_pipeline = import_voice_converter()
	await infer_pipeline.convert_audio(
	pitch=voice.pitch,
	filter_radius=voice.filter_radius,
	index_rate=voice.index_rate,
	volume_envelope=voice.rms_mix_rate,
	protect=voice.protect,
	hop_length=voice.hop_length,
	f0_method=voice.f0_method,
	audio_input_path=str(output_tts_path),
	audio_output_path=str(output_rvc_path),
	model_path=voice.model,
	index_path=voice.index,
	split_audio=True,
	f0_autotune=voice.autotune is not None,
	f0_autotune_strength=voice.autotune,
	clean_audio=voice.clean is not None,
	clean_strength=voice.clean,
	export_format=format.upper(),
	upscale_audio=voice.upscale,
	f0_file=None,
	embedder_model=voice.embedder_model,
	embedder_model_custom=None,
	sid=0,
	formant_shifting=None,
	formant_qfrency=None,
	formant_timbre=None,
	post_process=None,
	reverb=None,
	pitch_shift=None,
	limiter=None,
	gain=None,
	distortion=None,
	chorus=None,
	bitcrush=None,
	clipping=None,
	compressor=None,
	delay=None,
	sliders=None,
	callback=lambda pct: update_progress(0.5 + pct / 2, "Converting..."),
	)

	log.info("Successfully synthesized text (%s chars)", len(text))
	return "Text synthesized successfully.", str(output_rvc_path)


	# Prerequisites