Spaces:

projecte-aina
/

matxa-alvocat-tts-ca

Running

App Files Files Community

Baybars commited on Apr 16, 2024

Commit

fc52d83

1 Parent(s): 6de7952

about page template added

Browse files

Files changed (2) hide show

.gitignore +1 -0
infer_onnx.py +42 -3

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ venv

infer_onnx.py CHANGED Viewed

@@ -1,7 +1,6 @@
 import numpy as np
 import onnxruntime
-import utils
 from text import text_to_sequence, sequence_to_text
 import torch
 import gradio as gr
@@ -9,10 +8,13 @@ import soundfile as sf
 import tempfile
 import yaml
 import json
 from huggingface_hub import hf_hub_download
 from time import perf_counter
 def intersperse(lst, item):
     result = [item] * (len(lst) * 2 + 1)
     result[1::2] = lst
@@ -190,6 +192,41 @@ For vocoders we use [Vocos](https://huggingface.co/BSC-LT/vocos-mel-22khz-cat) t
 """
 article = "Training and demo by The Language Technologies Unit from Barcelona Supercomputing Center."
 vits2_inference = gr.Interface(
@@ -203,7 +240,7 @@ vits2_inference = gr.Interface(
         gr.Dropdown(
             choices=speakers,
             label="Speaker id",
-            value='caf_08106',
             info=f"Models are trained on 47 speakers. You can prompt the model using one of these speaker ids."
         ),
         gr.Slider(
@@ -227,12 +264,14 @@ vits2_inference = gr.Interface(
     outputs=[gr.Audio(label="Matcha vocos", interactive=False, type="filepath")]
 )
 demo = gr.Blocks()
 with demo:
     gr.Markdown(title)
     gr.Markdown(description)
-    gr.TabbedInterface([vits2_inference], ["Multispeaker"])
     gr.Markdown(article)
 demo.queue(max_size=10)

 import numpy as np
 import onnxruntime
 from text import text_to_sequence, sequence_to_text
 import torch
 import gradio as gr
 import tempfile
 import yaml
 import json
+import os
 from huggingface_hub import hf_hub_download
 from time import perf_counter
+DEFAULT_SPEAKER_ID = os.environ.get("DEFAULT_SPEAKER_ID", default="caf_08106")
 def intersperse(lst, item):
     result = [item] * (len(lst) * 2 + 1)
     result[1::2] = lst
 """
+about = """
+## 📄 About
+The TTS test about.
+## Samples
+<table style="font-size:16px">
+  <col width="205">
+  <col width="205">
+<thead>
+<tr>
+  <td>Col1</td>
+  <td>Col2</td>
+  <td>Col3</td>
+</tr>
+</thead>
+<tbody>
+<tr>
+  <td><audio controls="" preload="none" style="width: 200px">audio not supported<source src="https://samplelib.com/lib/preview/mp3/sample-3s.mp3"></audio></td>
+  <td><audio controls="" preload="none" style="width: 200px">audio not supported<source src="https://samplelib.com/lib/preview/mp3/sample-6s.mp3"></audio></td>
+  <td><audio controls="" preload="none" style="width: 200px">audio not supported<source src="https://samplelib.com/lib/preview/mp3/sample-9s.mp3"></audio></td>
+</tr>
+<tr>
+   <td><audio controls="" preload="none" style="width: 200px">audio not supported<source src="https://samplelib.com/lib/preview/mp3/sample-3s.mp3"></audio></td>
+  <td><audio controls="" preload="none" style="width: 200px">audio not supported<source src="https://samplelib.com/lib/preview/mp3/sample-6s.mp3"></audio></td>
+  <td><audio controls="" preload="none" style="width: 200px">audio not supported<source src="https://samplelib.com/lib/preview/mp3/sample-9s.mp3"></audio></td>
+</tr>
+<tr>
+  <td><audio controls="" preload="none" style="width: 200px">audio not supported<source src="https://samplelib.com/lib/preview/mp3/sample-3s.mp3"></audio></td>
+  <td><audio controls="" preload="none" style="width: 200px">audio not supported<source src="https://samplelib.com/lib/preview/mp3/sample-6s.mp3"></audio></td>
+  <td><audio controls="" preload="none" style="width: 200px">audio not supported<source src="https://samplelib.com/lib/preview/mp3/sample-9s.mp3"></audio></td>
+</tr>
+</tbody></table>
+"""
 article = "Training and demo by The Language Technologies Unit from Barcelona Supercomputing Center."
 vits2_inference = gr.Interface(
         gr.Dropdown(
             choices=speakers,
             label="Speaker id",
+            value=DEFAULT_SPEAKER_ID,
             info=f"Models are trained on 47 speakers. You can prompt the model using one of these speaker ids."
         ),
         gr.Slider(
     outputs=[gr.Audio(label="Matcha vocos", interactive=False, type="filepath")]
 )
+about_article = gr.Markdown(about)
 demo = gr.Blocks()
 with demo:
     gr.Markdown(title)
     gr.Markdown(description)
+    gr.TabbedInterface([vits2_inference, about_article], ["Demo", "About"])
     gr.Markdown(article)
 demo.queue(max_size=10)