This PR directly displays the language in the dropdown

#57
Files changed (1) hide show
  1. app.py +17 -30
app.py CHANGED
@@ -284,7 +284,6 @@ def predict(
284
  repetition_penalty=7.0,
285
  temperature=0.85,
286
  )
287
-
288
  first_chunk = True
289
  for i, chunk in enumerate(chunks):
290
  if first_chunk:
@@ -300,13 +299,11 @@ def predict(
300
  #metrics_text += (
301
  # f"Time to generate audio: {round(inference_time*1000)} milliseconds\n"
302
  #)
303
-
304
  wav = torch.cat(wav_chunks, dim=0)
305
  print(wav.shape)
306
  real_time_factor = (time.time() - t0) / wav.shape[0] * 24000
307
  print(f"Real-time factor (RTF): {real_time_factor}")
308
  metrics_text += f"Real-time factor (RTF): {real_time_factor:.2f}\n"
309
-
310
  torchaudio.save("output.wav", wav.squeeze().unsqueeze(0).cpu(), 24000)
311
  """
312
 
@@ -411,29 +408,19 @@ def predict(
411
  title = "Coqui๐Ÿธ XTTS"
412
 
413
  description = """
414
-
415
  <br/>
416
-
417
  This demo is currently running **XTTS v2.0.3** <a href="https://huggingface.co/coqui/XTTS-v2">XTTS</a> is a multilingual text-to-speech and voice-cloning model. This demo features zero-shot voice cloning, however, you can fine-tune XTTS for better results. Leave a star ๐ŸŒŸ on Github <a href="https://github.com/coqui-ai/TTS">๐ŸธTTS</a>, where our open-source inference and training code lives.
418
-
419
- <br/>
420
-
421
- Supported languages: Arabic: ar, Brazilian Portuguese: pt , Mandarin Chinese: zh-cn, Czech: cs, Dutch: nl, English: en, French: fr, German: de, Italian: it, Polish: pl, Russian: ru, Spanish: es, Turkish: tr, Japanese: ja, Korean: ko, Hungarian: hu, Hindi: hi
422
-
423
  <br/>
424
  """
425
 
426
  links = """
427
  <img referrerpolicy="no-referrer-when-downgrade" src="https://static.scarf.sh/a.png?x-pxid=0d00920c-8cc9-4bf3-90f2-a615797e5f59" />
428
-
429
  | | |
430
  | ------------------------------- | --------------------------------------- |
431
  | ๐Ÿธ๐Ÿ’ฌ **CoquiTTS** | <a style="display:inline-block" href='https://github.com/coqui-ai/TTS'><img src='https://img.shields.io/github/stars/coqui-ai/TTS?style=social' /></a>|
432
  | ๐Ÿ’ผ **Documentation** | [ReadTheDocs](https://tts.readthedocs.io/en/latest/)
433
  | ๐Ÿ‘ฉโ€๐Ÿ’ป **Questions** | [GitHub Discussions](https://github.com/coqui-ai/TTS/discussions) |
434
  | ๐Ÿ—ฏ **Community** | [![Dicord](https://img.shields.io/discord/1037326658807533628?color=%239B59B6&label=chat%20on%20discord)](https://discord.gg/5eXr5seRrv) |
435
-
436
-
437
  """
438
 
439
  article = """
@@ -626,23 +613,23 @@ with gr.Blocks(analytics_enabled=False) as demo:
626
  label="Language",
627
  info="Select an output language for the synthesised speech",
628
  choices=[
629
- "en",
630
- "es",
631
- "fr",
632
- "de",
633
- "it",
634
- "pt",
635
- "pl",
636
- "tr",
637
- "ru",
638
- "nl",
639
- "cs",
640
- "ar",
641
- "zh-cn",
642
- "ja",
643
- "ko",
644
- "hu",
645
- "hi"
646
  ],
647
  max_choices=1,
648
  value="en",
 
284
  repetition_penalty=7.0,
285
  temperature=0.85,
286
  )
 
287
  first_chunk = True
288
  for i, chunk in enumerate(chunks):
289
  if first_chunk:
 
299
  #metrics_text += (
300
  # f"Time to generate audio: {round(inference_time*1000)} milliseconds\n"
301
  #)
 
302
  wav = torch.cat(wav_chunks, dim=0)
303
  print(wav.shape)
304
  real_time_factor = (time.time() - t0) / wav.shape[0] * 24000
305
  print(f"Real-time factor (RTF): {real_time_factor}")
306
  metrics_text += f"Real-time factor (RTF): {real_time_factor:.2f}\n"
 
307
  torchaudio.save("output.wav", wav.squeeze().unsqueeze(0).cpu(), 24000)
308
  """
309
 
 
408
  title = "Coqui๐Ÿธ XTTS"
409
 
410
  description = """
 
411
  <br/>
 
412
  This demo is currently running **XTTS v2.0.3** <a href="https://huggingface.co/coqui/XTTS-v2">XTTS</a> is a multilingual text-to-speech and voice-cloning model. This demo features zero-shot voice cloning, however, you can fine-tune XTTS for better results. Leave a star ๐ŸŒŸ on Github <a href="https://github.com/coqui-ai/TTS">๐ŸธTTS</a>, where our open-source inference and training code lives.
 
 
 
 
 
413
  <br/>
414
  """
415
 
416
  links = """
417
  <img referrerpolicy="no-referrer-when-downgrade" src="https://static.scarf.sh/a.png?x-pxid=0d00920c-8cc9-4bf3-90f2-a615797e5f59" />
 
418
  | | |
419
  | ------------------------------- | --------------------------------------- |
420
  | ๐Ÿธ๐Ÿ’ฌ **CoquiTTS** | <a style="display:inline-block" href='https://github.com/coqui-ai/TTS'><img src='https://img.shields.io/github/stars/coqui-ai/TTS?style=social' /></a>|
421
  | ๐Ÿ’ผ **Documentation** | [ReadTheDocs](https://tts.readthedocs.io/en/latest/)
422
  | ๐Ÿ‘ฉโ€๐Ÿ’ป **Questions** | [GitHub Discussions](https://github.com/coqui-ai/TTS/discussions) |
423
  | ๐Ÿ—ฏ **Community** | [![Dicord](https://img.shields.io/discord/1037326658807533628?color=%239B59B6&label=chat%20on%20discord)](https://discord.gg/5eXr5seRrv) |
 
 
424
  """
425
 
426
  article = """
 
613
  label="Language",
614
  info="Select an output language for the synthesised speech",
615
  choices=[
616
+ ["Arabic", "ar"],
617
+ ["Brazilian Portuguese", "pt"],
618
+ ["Mandarin Chinese", "zh-cn"],
619
+ ["Czech", "cs"],
620
+ ["Dutch", "nl"],
621
+ ["English", "en"],
622
+ ["French", "fr"],
623
+ ["German", "de"],
624
+ ["Italian", "it"],
625
+ ["Polish", "pl"],
626
+ ["Russian", "ru"],
627
+ ["Spanish", "es"],
628
+ ["Turkish", "tr"],
629
+ ["Japanese", "ja"],
630
+ ["Korean", "ko"],
631
+ ["Hungarian", "hu"],
632
+ ["Hindi", "hi"]
633
  ],
634
  max_choices=1,
635
  value="en",