Spaces:

jlopez00
/

tts-service

Runtime error

App Files Files Community

jlopez00 commited on 18 days ago

Commit

1c6f49f

•

1 Parent(s): 2c01ee6

Upload folder using huggingface_hub

Browse files

Files changed (1) hide show

tts_service/tts.py +46 -52

tts_service/tts.py CHANGED Viewed

@@ -1,13 +1,17 @@
 import asyncio
 import json
 import logging
 import os
 from functools import lru_cache
 import edge_tts
 import gradio as gr
-from tts_service.utils import cache_path
 from tts_service.voices import voice_manager
 log = logging.getLogger(__name__)
@@ -27,19 +31,17 @@ async def run_tts_script(
     rate: int = 0,
     progress=gr.Progress(),  # noqa: B008
 ) -> tuple[str, str]:
-    async def update_progress(pct, msg) -> None:
         log.debug("Progress: %.1f%%: %s", pct * 100, msg)
         progress(pct, msg)
-        await asyncio.sleep(0)
     log.info("Synthesizing text (%s chars)", len(text))
-    await update_progress(0, "Starting...")
     voice = voice_manager.voices[voice_name]
-    format = "wav"
     text = text.strip()
-    output_tts_path = cache_path(voice.tts, "", rate, text, extension=format)
     text_ptr = 0
     if not os.path.exists(output_tts_path):
         rates = f"+{rate}%" if rate >= 0 else f"{rate}%"
@@ -58,56 +60,48 @@ async def run_tts_script(
                     text_ptr = text_index + len(chunk_text)
                     pct_complete = text_ptr / len(text)
                     log.debug("%.1f%%: %s", pct_complete * 100, chunk)
-                    await update_progress(pct_complete / 2, "Synthesizing...")
                 else:
                     log.warning("Unknown chunk type: %s: %s", chunk_type, json.dumps(chunk))
-    output_rvc_path = cache_path(voice.tts, voice.name, rate, text, extension=format)
     if not os.path.exists(output_rvc_path):
-        infer_pipeline = import_voice_converter()
-        await infer_pipeline.convert_audio(
-            pitch=voice.pitch,
-            filter_radius=voice.filter_radius,
-            index_rate=voice.index_rate,
-            volume_envelope=voice.rms_mix_rate,
-            protect=voice.protect,
-            hop_length=voice.hop_length,
-            f0_method=voice.f0_method,
-            audio_input_path=str(output_tts_path),
-            audio_output_path=str(output_rvc_path),
-            model_path=voice.model,
-            index_path=voice.index,
-            split_audio=True,
-            f0_autotune=voice.autotune is not None,
-            f0_autotune_strength=voice.autotune,
-            clean_audio=voice.clean is not None,
-            clean_strength=voice.clean,
-            export_format=format.upper(),
-            upscale_audio=voice.upscale,
-            f0_file=None,
-            embedder_model=voice.embedder_model,
-            embedder_model_custom=None,
-            sid=0,
-            formant_shifting=None,
-            formant_qfrency=None,
-            formant_timbre=None,
-            post_process=None,
-            reverb=None,
-            pitch_shift=None,
-            limiter=None,
-            gain=None,
-            distortion=None,
-            chorus=None,
-            bitcrush=None,
-            clipping=None,
-            compressor=None,
-            delay=None,
-            sliders=None,
-            callback=lambda pct: update_progress(0.5 + pct / 2, "Converting..."),
-        )
-    log.info("Successfully synthesized text (%s chars)", len(text))
-    return "Text synthesized successfully.", str(output_rvc_path)
 # Prerequisites

 import asyncio
 import json
 import logging
+import math
 import os
+import time
 from functools import lru_cache
 import edge_tts
 import gradio as gr
+import httpx
+import soundfile as sf
+from tts_service.utils import cache_path, env_str
 from tts_service.voices import voice_manager
 log = logging.getLogger(__name__)
     rate: int = 0,
     progress=gr.Progress(),  # noqa: B008
 ) -> tuple[str, str]:
+    def update_progress(pct, msg) -> None:
         log.debug("Progress: %.1f%%: %s", pct * 100, msg)
         progress(pct, msg)
     log.info("Synthesizing text (%s chars)", len(text))
+    update_progress(0, "Starting...")
     voice = voice_manager.voices[voice_name]
     text = text.strip()
+    output_tts_path = cache_path(voice.tts, "", rate, text, extension="mp3")
     text_ptr = 0
     if not os.path.exists(output_tts_path):
         rates = f"+{rate}%" if rate >= 0 else f"{rate}%"
                     text_ptr = text_index + len(chunk_text)
                     pct_complete = text_ptr / len(text)
                     log.debug("%.1f%%: %s", pct_complete * 100, chunk)
+                    update_progress(pct_complete / 2, "Synthesizing...")
                 else:
                     log.warning("Unknown chunk type: %s: %s", chunk_type, json.dumps(chunk))
+    audio_duration = sf.info(output_tts_path).duration
+    expected_processing_time = audio_duration / 8 + 10  # 10x real-time on nvidia t4
+    log.info(f"Synthesized {audio_duration:,.0f}s, expected processing time: {expected_processing_time:,.0f}s")
+    output_rvc_path = cache_path(voice.tts, voice.name, rate, text, extension="mp3")
     if not os.path.exists(output_rvc_path):
+        ts0 = time.time()
+        last_check = 0.0
+        timeout = httpx.Timeout(5, read=15.0)
+        endpoint_url = env_str("RVC_ENDPOINT")
+        async with httpx.AsyncClient(timeout=timeout) as client:
+            response = await client.post(f"{endpoint_url}/v1/rvc", content=output_tts_path.read_bytes())
+            response.raise_for_status()
+            data = response.json()
+            log.info("Submitted for conversion: %s", data)
+            result_url = data["urls"]["result"]
+            while True:
+                elapsed = time.time() - ts0
+                proportion = elapsed / expected_processing_time
+                pct_complete = 0.5 + math.tanh(proportion) / 2
+                update_progress(pct_complete, "Processing...")
+                if elapsed > 0.8 * expected_processing_time and elapsed - last_check > 10:
+                    last_check = elapsed
+                    response = await client.get(result_url)
+                    content_type = response.headers.get("Content-Type")
+                    processed_bytes = await response.aread()
+                    log.info(f"Checking status: %s (%s) {len(processed_bytes):,} bytes", response.status_code, content_type)
+                    if response.status_code == 200 and content_type == "audio/mpeg":
+                        output_rvc_path.write_bytes(processed_bytes)
+                        break
+                    elif response.status_code != 404:
+                        response.raise_for_status()
+                await asyncio.sleep(0.1)
+        log.info("Successfully converted text (%s chars) -> %s", len(text), output_rvc_path)
+    else:
+        log.info("Already converted: %s", output_rvc_path)
+    return f"{audio_duration:,.0f}s of audio successfully synthesized.", str(output_rvc_path)
 # Prerequisites