Spaces:
Runtime error
Runtime error
Upload folder using huggingface_hub
Browse files- tts_service/tts.py +46 -52
tts_service/tts.py
CHANGED
@@ -1,13 +1,17 @@
|
|
1 |
import asyncio
|
2 |
import json
|
3 |
import logging
|
|
|
4 |
import os
|
|
|
5 |
from functools import lru_cache
|
6 |
|
7 |
import edge_tts
|
8 |
import gradio as gr
|
|
|
|
|
9 |
|
10 |
-
from tts_service.utils import cache_path
|
11 |
from tts_service.voices import voice_manager
|
12 |
|
13 |
log = logging.getLogger(__name__)
|
@@ -27,19 +31,17 @@ async def run_tts_script(
|
|
27 |
rate: int = 0,
|
28 |
progress=gr.Progress(), # noqa: B008
|
29 |
) -> tuple[str, str]:
|
30 |
-
|
31 |
log.debug("Progress: %.1f%%: %s", pct * 100, msg)
|
32 |
progress(pct, msg)
|
33 |
-
await asyncio.sleep(0)
|
34 |
|
35 |
log.info("Synthesizing text (%s chars)", len(text))
|
36 |
|
37 |
-
|
38 |
voice = voice_manager.voices[voice_name]
|
39 |
-
format = "wav"
|
40 |
|
41 |
text = text.strip()
|
42 |
-
output_tts_path = cache_path(voice.tts, "", rate, text, extension=
|
43 |
text_ptr = 0
|
44 |
if not os.path.exists(output_tts_path):
|
45 |
rates = f"+{rate}%" if rate >= 0 else f"{rate}%"
|
@@ -58,56 +60,48 @@ async def run_tts_script(
|
|
58 |
text_ptr = text_index + len(chunk_text)
|
59 |
pct_complete = text_ptr / len(text)
|
60 |
log.debug("%.1f%%: %s", pct_complete * 100, chunk)
|
61 |
-
|
62 |
else:
|
63 |
log.warning("Unknown chunk type: %s: %s", chunk_type, json.dumps(chunk))
|
64 |
|
65 |
-
|
|
|
|
|
|
|
|
|
66 |
if not os.path.exists(output_rvc_path):
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
distortion=None,
|
100 |
-
chorus=None,
|
101 |
-
bitcrush=None,
|
102 |
-
clipping=None,
|
103 |
-
compressor=None,
|
104 |
-
delay=None,
|
105 |
-
sliders=None,
|
106 |
-
callback=lambda pct: update_progress(0.5 + pct / 2, "Converting..."),
|
107 |
-
)
|
108 |
-
|
109 |
-
log.info("Successfully synthesized text (%s chars)", len(text))
|
110 |
-
return "Text synthesized successfully.", str(output_rvc_path)
|
111 |
|
112 |
|
113 |
# Prerequisites
|
|
|
1 |
import asyncio
|
2 |
import json
|
3 |
import logging
|
4 |
+
import math
|
5 |
import os
|
6 |
+
import time
|
7 |
from functools import lru_cache
|
8 |
|
9 |
import edge_tts
|
10 |
import gradio as gr
|
11 |
+
import httpx
|
12 |
+
import soundfile as sf
|
13 |
|
14 |
+
from tts_service.utils import cache_path, env_str
|
15 |
from tts_service.voices import voice_manager
|
16 |
|
17 |
log = logging.getLogger(__name__)
|
|
|
31 |
rate: int = 0,
|
32 |
progress=gr.Progress(), # noqa: B008
|
33 |
) -> tuple[str, str]:
|
34 |
+
def update_progress(pct, msg) -> None:
|
35 |
log.debug("Progress: %.1f%%: %s", pct * 100, msg)
|
36 |
progress(pct, msg)
|
|
|
37 |
|
38 |
log.info("Synthesizing text (%s chars)", len(text))
|
39 |
|
40 |
+
update_progress(0, "Starting...")
|
41 |
voice = voice_manager.voices[voice_name]
|
|
|
42 |
|
43 |
text = text.strip()
|
44 |
+
output_tts_path = cache_path(voice.tts, "", rate, text, extension="mp3")
|
45 |
text_ptr = 0
|
46 |
if not os.path.exists(output_tts_path):
|
47 |
rates = f"+{rate}%" if rate >= 0 else f"{rate}%"
|
|
|
60 |
text_ptr = text_index + len(chunk_text)
|
61 |
pct_complete = text_ptr / len(text)
|
62 |
log.debug("%.1f%%: %s", pct_complete * 100, chunk)
|
63 |
+
update_progress(pct_complete / 2, "Synthesizing...")
|
64 |
else:
|
65 |
log.warning("Unknown chunk type: %s: %s", chunk_type, json.dumps(chunk))
|
66 |
|
67 |
+
audio_duration = sf.info(output_tts_path).duration
|
68 |
+
expected_processing_time = audio_duration / 8 + 10 # 10x real-time on nvidia t4
|
69 |
+
log.info(f"Synthesized {audio_duration:,.0f}s, expected processing time: {expected_processing_time:,.0f}s")
|
70 |
+
|
71 |
+
output_rvc_path = cache_path(voice.tts, voice.name, rate, text, extension="mp3")
|
72 |
if not os.path.exists(output_rvc_path):
|
73 |
+
ts0 = time.time()
|
74 |
+
last_check = 0.0
|
75 |
+
timeout = httpx.Timeout(5, read=15.0)
|
76 |
+
endpoint_url = env_str("RVC_ENDPOINT")
|
77 |
+
async with httpx.AsyncClient(timeout=timeout) as client:
|
78 |
+
response = await client.post(f"{endpoint_url}/v1/rvc", content=output_tts_path.read_bytes())
|
79 |
+
response.raise_for_status()
|
80 |
+
data = response.json()
|
81 |
+
log.info("Submitted for conversion: %s", data)
|
82 |
+
result_url = data["urls"]["result"]
|
83 |
+
while True:
|
84 |
+
elapsed = time.time() - ts0
|
85 |
+
proportion = elapsed / expected_processing_time
|
86 |
+
pct_complete = 0.5 + math.tanh(proportion) / 2
|
87 |
+
update_progress(pct_complete, "Processing...")
|
88 |
+
if elapsed > 0.8 * expected_processing_time and elapsed - last_check > 10:
|
89 |
+
last_check = elapsed
|
90 |
+
response = await client.get(result_url)
|
91 |
+
content_type = response.headers.get("Content-Type")
|
92 |
+
processed_bytes = await response.aread()
|
93 |
+
log.info(f"Checking status: %s (%s) {len(processed_bytes):,} bytes", response.status_code, content_type)
|
94 |
+
if response.status_code == 200 and content_type == "audio/mpeg":
|
95 |
+
output_rvc_path.write_bytes(processed_bytes)
|
96 |
+
break
|
97 |
+
elif response.status_code != 404:
|
98 |
+
response.raise_for_status()
|
99 |
+
await asyncio.sleep(0.1)
|
100 |
+
log.info("Successfully converted text (%s chars) -> %s", len(text), output_rvc_path)
|
101 |
+
else:
|
102 |
+
log.info("Already converted: %s", output_rvc_path)
|
103 |
+
|
104 |
+
return f"{audio_duration:,.0f}s of audio successfully synthesized.", str(output_rvc_path)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
105 |
|
106 |
|
107 |
# Prerequisites
|