about page template added
Browse files- .gitignore +1 -0
- infer_onnx.py +42 -3
.gitignore
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
venv
|
infer_onnx.py
CHANGED
@@ -1,7 +1,6 @@
|
|
1 |
import numpy as np
|
2 |
import onnxruntime
|
3 |
|
4 |
-
import utils
|
5 |
from text import text_to_sequence, sequence_to_text
|
6 |
import torch
|
7 |
import gradio as gr
|
@@ -9,10 +8,13 @@ import soundfile as sf
|
|
9 |
import tempfile
|
10 |
import yaml
|
11 |
import json
|
|
|
12 |
|
13 |
from huggingface_hub import hf_hub_download
|
14 |
from time import perf_counter
|
15 |
|
|
|
|
|
16 |
def intersperse(lst, item):
|
17 |
result = [item] * (len(lst) * 2 + 1)
|
18 |
result[1::2] = lst
|
@@ -190,6 +192,41 @@ For vocoders we use [Vocos](https://huggingface.co/BSC-LT/vocos-mel-22khz-cat) t
|
|
190 |
|
191 |
"""
|
192 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
193 |
article = "Training and demo by The Language Technologies Unit from Barcelona Supercomputing Center."
|
194 |
|
195 |
vits2_inference = gr.Interface(
|
@@ -203,7 +240,7 @@ vits2_inference = gr.Interface(
|
|
203 |
gr.Dropdown(
|
204 |
choices=speakers,
|
205 |
label="Speaker id",
|
206 |
-
value=
|
207 |
info=f"Models are trained on 47 speakers. You can prompt the model using one of these speaker ids."
|
208 |
),
|
209 |
gr.Slider(
|
@@ -227,12 +264,14 @@ vits2_inference = gr.Interface(
|
|
227 |
outputs=[gr.Audio(label="Matcha vocos", interactive=False, type="filepath")]
|
228 |
)
|
229 |
|
|
|
|
|
230 |
demo = gr.Blocks()
|
231 |
|
232 |
with demo:
|
233 |
gr.Markdown(title)
|
234 |
gr.Markdown(description)
|
235 |
-
gr.TabbedInterface([vits2_inference], ["
|
236 |
gr.Markdown(article)
|
237 |
|
238 |
demo.queue(max_size=10)
|
|
|
1 |
import numpy as np
|
2 |
import onnxruntime
|
3 |
|
|
|
4 |
from text import text_to_sequence, sequence_to_text
|
5 |
import torch
|
6 |
import gradio as gr
|
|
|
8 |
import tempfile
|
9 |
import yaml
|
10 |
import json
|
11 |
+
import os
|
12 |
|
13 |
from huggingface_hub import hf_hub_download
|
14 |
from time import perf_counter
|
15 |
|
16 |
+
DEFAULT_SPEAKER_ID = os.environ.get("DEFAULT_SPEAKER_ID", default="caf_08106")
|
17 |
+
|
18 |
def intersperse(lst, item):
|
19 |
result = [item] * (len(lst) * 2 + 1)
|
20 |
result[1::2] = lst
|
|
|
192 |
|
193 |
"""
|
194 |
|
195 |
+
about = """
|
196 |
+
## π About
|
197 |
+
The TTS test about.
|
198 |
+
|
199 |
+
## Samples
|
200 |
+
|
201 |
+
<table style="font-size:16px">
|
202 |
+
<col width="205">
|
203 |
+
<col width="205">
|
204 |
+
<thead>
|
205 |
+
<tr>
|
206 |
+
<td>Col1</td>
|
207 |
+
<td>Col2</td>
|
208 |
+
<td>Col3</td>
|
209 |
+
</tr>
|
210 |
+
</thead>
|
211 |
+
<tbody>
|
212 |
+
<tr>
|
213 |
+
<td><audio controls="" preload="none" style="width: 200px">audio not supported<source src="https://samplelib.com/lib/preview/mp3/sample-3s.mp3"></audio></td>
|
214 |
+
<td><audio controls="" preload="none" style="width: 200px">audio not supported<source src="https://samplelib.com/lib/preview/mp3/sample-6s.mp3"></audio></td>
|
215 |
+
<td><audio controls="" preload="none" style="width: 200px">audio not supported<source src="https://samplelib.com/lib/preview/mp3/sample-9s.mp3"></audio></td>
|
216 |
+
</tr>
|
217 |
+
<tr>
|
218 |
+
<td><audio controls="" preload="none" style="width: 200px">audio not supported<source src="https://samplelib.com/lib/preview/mp3/sample-3s.mp3"></audio></td>
|
219 |
+
<td><audio controls="" preload="none" style="width: 200px">audio not supported<source src="https://samplelib.com/lib/preview/mp3/sample-6s.mp3"></audio></td>
|
220 |
+
<td><audio controls="" preload="none" style="width: 200px">audio not supported<source src="https://samplelib.com/lib/preview/mp3/sample-9s.mp3"></audio></td>
|
221 |
+
</tr>
|
222 |
+
<tr>
|
223 |
+
<td><audio controls="" preload="none" style="width: 200px">audio not supported<source src="https://samplelib.com/lib/preview/mp3/sample-3s.mp3"></audio></td>
|
224 |
+
<td><audio controls="" preload="none" style="width: 200px">audio not supported<source src="https://samplelib.com/lib/preview/mp3/sample-6s.mp3"></audio></td>
|
225 |
+
<td><audio controls="" preload="none" style="width: 200px">audio not supported<source src="https://samplelib.com/lib/preview/mp3/sample-9s.mp3"></audio></td>
|
226 |
+
</tr>
|
227 |
+
</tbody></table>
|
228 |
+
"""
|
229 |
+
|
230 |
article = "Training and demo by The Language Technologies Unit from Barcelona Supercomputing Center."
|
231 |
|
232 |
vits2_inference = gr.Interface(
|
|
|
240 |
gr.Dropdown(
|
241 |
choices=speakers,
|
242 |
label="Speaker id",
|
243 |
+
value=DEFAULT_SPEAKER_ID,
|
244 |
info=f"Models are trained on 47 speakers. You can prompt the model using one of these speaker ids."
|
245 |
),
|
246 |
gr.Slider(
|
|
|
264 |
outputs=[gr.Audio(label="Matcha vocos", interactive=False, type="filepath")]
|
265 |
)
|
266 |
|
267 |
+
about_article = gr.Markdown(about)
|
268 |
+
|
269 |
demo = gr.Blocks()
|
270 |
|
271 |
with demo:
|
272 |
gr.Markdown(title)
|
273 |
gr.Markdown(description)
|
274 |
+
gr.TabbedInterface([vits2_inference, about_article], ["Demo", "About"])
|
275 |
gr.Markdown(article)
|
276 |
|
277 |
demo.queue(max_size=10)
|