Spaces:
Running
on
T4
Running
on
T4
This PR directly displays the language in the dropdown
#57
by
Fabrice-TIERCELIN
- opened
app.py
CHANGED
@@ -284,7 +284,6 @@ def predict(
|
|
284 |
repetition_penalty=7.0,
|
285 |
temperature=0.85,
|
286 |
)
|
287 |
-
|
288 |
first_chunk = True
|
289 |
for i, chunk in enumerate(chunks):
|
290 |
if first_chunk:
|
@@ -300,13 +299,11 @@ def predict(
|
|
300 |
#metrics_text += (
|
301 |
# f"Time to generate audio: {round(inference_time*1000)} milliseconds\n"
|
302 |
#)
|
303 |
-
|
304 |
wav = torch.cat(wav_chunks, dim=0)
|
305 |
print(wav.shape)
|
306 |
real_time_factor = (time.time() - t0) / wav.shape[0] * 24000
|
307 |
print(f"Real-time factor (RTF): {real_time_factor}")
|
308 |
metrics_text += f"Real-time factor (RTF): {real_time_factor:.2f}\n"
|
309 |
-
|
310 |
torchaudio.save("output.wav", wav.squeeze().unsqueeze(0).cpu(), 24000)
|
311 |
"""
|
312 |
|
@@ -411,29 +408,19 @@ def predict(
|
|
411 |
title = "Coqui๐ธ XTTS"
|
412 |
|
413 |
description = """
|
414 |
-
|
415 |
<br/>
|
416 |
-
|
417 |
This demo is currently running **XTTS v2.0.3** <a href="https://huggingface.co/coqui/XTTS-v2">XTTS</a> is a multilingual text-to-speech and voice-cloning model. This demo features zero-shot voice cloning, however, you can fine-tune XTTS for better results. Leave a star ๐ on Github <a href="https://github.com/coqui-ai/TTS">๐ธTTS</a>, where our open-source inference and training code lives.
|
418 |
-
|
419 |
-
<br/>
|
420 |
-
|
421 |
-
Supported languages: Arabic: ar, Brazilian Portuguese: pt , Mandarin Chinese: zh-cn, Czech: cs, Dutch: nl, English: en, French: fr, German: de, Italian: it, Polish: pl, Russian: ru, Spanish: es, Turkish: tr, Japanese: ja, Korean: ko, Hungarian: hu, Hindi: hi
|
422 |
-
|
423 |
<br/>
|
424 |
"""
|
425 |
|
426 |
links = """
|
427 |
<img referrerpolicy="no-referrer-when-downgrade" src="https://static.scarf.sh/a.png?x-pxid=0d00920c-8cc9-4bf3-90f2-a615797e5f59" />
|
428 |
-
|
429 |
| | |
|
430 |
| ------------------------------- | --------------------------------------- |
|
431 |
| ๐ธ๐ฌ **CoquiTTS** | <a style="display:inline-block" href='https://github.com/coqui-ai/TTS'><img src='https://img.shields.io/github/stars/coqui-ai/TTS?style=social' /></a>|
|
432 |
| ๐ผ **Documentation** | [ReadTheDocs](https://tts.readthedocs.io/en/latest/)
|
433 |
| ๐ฉโ๐ป **Questions** | [GitHub Discussions](https://github.com/coqui-ai/TTS/discussions) |
|
434 |
| ๐ฏ **Community** | [![Dicord](https://img.shields.io/discord/1037326658807533628?color=%239B59B6&label=chat%20on%20discord)](https://discord.gg/5eXr5seRrv) |
|
435 |
-
|
436 |
-
|
437 |
"""
|
438 |
|
439 |
article = """
|
@@ -626,23 +613,23 @@ with gr.Blocks(analytics_enabled=False) as demo:
|
|
626 |
label="Language",
|
627 |
info="Select an output language for the synthesised speech",
|
628 |
choices=[
|
629 |
-
"
|
630 |
-
"
|
631 |
-
"
|
632 |
-
"
|
633 |
-
"
|
634 |
-
"
|
635 |
-
"
|
636 |
-
"
|
637 |
-
"
|
638 |
-
"
|
639 |
-
"
|
640 |
-
"
|
641 |
-
"
|
642 |
-
"ja",
|
643 |
-
"ko",
|
644 |
-
"hu",
|
645 |
-
"hi"
|
646 |
],
|
647 |
max_choices=1,
|
648 |
value="en",
|
|
|
284 |
repetition_penalty=7.0,
|
285 |
temperature=0.85,
|
286 |
)
|
|
|
287 |
first_chunk = True
|
288 |
for i, chunk in enumerate(chunks):
|
289 |
if first_chunk:
|
|
|
299 |
#metrics_text += (
|
300 |
# f"Time to generate audio: {round(inference_time*1000)} milliseconds\n"
|
301 |
#)
|
|
|
302 |
wav = torch.cat(wav_chunks, dim=0)
|
303 |
print(wav.shape)
|
304 |
real_time_factor = (time.time() - t0) / wav.shape[0] * 24000
|
305 |
print(f"Real-time factor (RTF): {real_time_factor}")
|
306 |
metrics_text += f"Real-time factor (RTF): {real_time_factor:.2f}\n"
|
|
|
307 |
torchaudio.save("output.wav", wav.squeeze().unsqueeze(0).cpu(), 24000)
|
308 |
"""
|
309 |
|
|
|
408 |
title = "Coqui๐ธ XTTS"
|
409 |
|
410 |
description = """
|
|
|
411 |
<br/>
|
|
|
412 |
This demo is currently running **XTTS v2.0.3** <a href="https://huggingface.co/coqui/XTTS-v2">XTTS</a> is a multilingual text-to-speech and voice-cloning model. This demo features zero-shot voice cloning, however, you can fine-tune XTTS for better results. Leave a star ๐ on Github <a href="https://github.com/coqui-ai/TTS">๐ธTTS</a>, where our open-source inference and training code lives.
|
|
|
|
|
|
|
|
|
|
|
413 |
<br/>
|
414 |
"""
|
415 |
|
416 |
links = """
|
417 |
<img referrerpolicy="no-referrer-when-downgrade" src="https://static.scarf.sh/a.png?x-pxid=0d00920c-8cc9-4bf3-90f2-a615797e5f59" />
|
|
|
418 |
| | |
|
419 |
| ------------------------------- | --------------------------------------- |
|
420 |
| ๐ธ๐ฌ **CoquiTTS** | <a style="display:inline-block" href='https://github.com/coqui-ai/TTS'><img src='https://img.shields.io/github/stars/coqui-ai/TTS?style=social' /></a>|
|
421 |
| ๐ผ **Documentation** | [ReadTheDocs](https://tts.readthedocs.io/en/latest/)
|
422 |
| ๐ฉโ๐ป **Questions** | [GitHub Discussions](https://github.com/coqui-ai/TTS/discussions) |
|
423 |
| ๐ฏ **Community** | [![Dicord](https://img.shields.io/discord/1037326658807533628?color=%239B59B6&label=chat%20on%20discord)](https://discord.gg/5eXr5seRrv) |
|
|
|
|
|
424 |
"""
|
425 |
|
426 |
article = """
|
|
|
613 |
label="Language",
|
614 |
info="Select an output language for the synthesised speech",
|
615 |
choices=[
|
616 |
+
["Arabic", "ar"],
|
617 |
+
["Brazilian Portuguese", "pt"],
|
618 |
+
["Mandarin Chinese", "zh-cn"],
|
619 |
+
["Czech", "cs"],
|
620 |
+
["Dutch", "nl"],
|
621 |
+
["English", "en"],
|
622 |
+
["French", "fr"],
|
623 |
+
["German", "de"],
|
624 |
+
["Italian", "it"],
|
625 |
+
["Polish", "pl"],
|
626 |
+
["Russian", "ru"],
|
627 |
+
["Spanish", "es"],
|
628 |
+
["Turkish", "tr"],
|
629 |
+
["Japanese", "ja"],
|
630 |
+
["Korean", "ko"],
|
631 |
+
["Hungarian", "hu"],
|
632 |
+
["Hindi", "hi"]
|
633 |
],
|
634 |
max_choices=1,
|
635 |
value="en",
|