davidmezzetti
commited on
Commit
•
d51b59a
1
Parent(s):
db21534
Add model files
Browse files- README.md +30 -0
- config.yaml +92 -0
- model.onnx +3 -0
README.md
CHANGED
@@ -1,3 +1,33 @@
|
|
1 |
---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
license: apache-2.0
|
3 |
---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
---
|
2 |
+
tags:
|
3 |
+
- audio
|
4 |
+
- text-to-speech
|
5 |
+
- onnx
|
6 |
+
inference: false
|
7 |
+
language: en
|
8 |
+
datasets:
|
9 |
+
- ljspeech
|
10 |
license: apache-2.0
|
11 |
---
|
12 |
+
|
13 |
+
# ESPnet VITS Text-to-Speech (TTS) Model for ONNX
|
14 |
+
|
15 |
+
[espnet/kan-bayashi_ljspeech_vits](https://huggingface.co/espnet/kan-bayashi_ljspeech_vits). This model is an ONNX export using the [espnet_onnx](https://github.com/espnet/espnet_onnx) library.
|
16 |
+
|
17 |
+
## Usage with txtai
|
18 |
+
|
19 |
+
txtai has a built in Text to Speech (TTS) pipeline that makes using this model easy.
|
20 |
+
|
21 |
+
```python
|
22 |
+
```
|
23 |
+
|
24 |
+
## Usage with ONNX
|
25 |
+
|
26 |
+
This model can also be run directly with ONNX provided the input text is tokenized. Tokenization can be done with [ttstokenizer](https://github.com/neuml/ttstokenizer).
|
27 |
+
|
28 |
+
```python
|
29 |
+
```
|
30 |
+
|
31 |
+
## How to export
|
32 |
+
|
33 |
+
More information on how to export ESPnet models to ONNX can be [found here](https://github.com/espnet/espnet_onnx#text2speech-inference).
|
config.yaml
ADDED
@@ -0,0 +1,92 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
normalize:
|
2 |
+
use_normalize: false
|
3 |
+
text_cleaner:
|
4 |
+
cleaner_types: tacotron
|
5 |
+
token:
|
6 |
+
list:
|
7 |
+
- <blank>
|
8 |
+
- <unk>
|
9 |
+
- AH0
|
10 |
+
- N
|
11 |
+
- T
|
12 |
+
- D
|
13 |
+
- S
|
14 |
+
- R
|
15 |
+
- L
|
16 |
+
- DH
|
17 |
+
- K
|
18 |
+
- Z
|
19 |
+
- IH1
|
20 |
+
- IH0
|
21 |
+
- M
|
22 |
+
- EH1
|
23 |
+
- W
|
24 |
+
- P
|
25 |
+
- AE1
|
26 |
+
- AH1
|
27 |
+
- V
|
28 |
+
- ER0
|
29 |
+
- F
|
30 |
+
- ','
|
31 |
+
- AA1
|
32 |
+
- B
|
33 |
+
- HH
|
34 |
+
- IY1
|
35 |
+
- UW1
|
36 |
+
- IY0
|
37 |
+
- AO1
|
38 |
+
- EY1
|
39 |
+
- AY1
|
40 |
+
- .
|
41 |
+
- OW1
|
42 |
+
- SH
|
43 |
+
- NG
|
44 |
+
- G
|
45 |
+
- ER1
|
46 |
+
- CH
|
47 |
+
- JH
|
48 |
+
- Y
|
49 |
+
- AW1
|
50 |
+
- TH
|
51 |
+
- UH1
|
52 |
+
- EH2
|
53 |
+
- OW0
|
54 |
+
- EY2
|
55 |
+
- AO0
|
56 |
+
- IH2
|
57 |
+
- AE2
|
58 |
+
- AY2
|
59 |
+
- AA2
|
60 |
+
- UW0
|
61 |
+
- EH0
|
62 |
+
- OY1
|
63 |
+
- EY0
|
64 |
+
- AO2
|
65 |
+
- ZH
|
66 |
+
- OW2
|
67 |
+
- AE0
|
68 |
+
- UW2
|
69 |
+
- AH2
|
70 |
+
- AY0
|
71 |
+
- IY2
|
72 |
+
- AW2
|
73 |
+
- AA0
|
74 |
+
- ''''
|
75 |
+
- ER2
|
76 |
+
- UH2
|
77 |
+
- '?'
|
78 |
+
- OY2
|
79 |
+
- '!'
|
80 |
+
- AW0
|
81 |
+
- UH0
|
82 |
+
- OY0
|
83 |
+
- ..
|
84 |
+
- <sos/eos>
|
85 |
+
tokenizer:
|
86 |
+
g2p_type: g2p_en_no_space
|
87 |
+
token_type: phn
|
88 |
+
tts_model:
|
89 |
+
model_path: espnet/kan-bayashi_ljspeech_vits/full/vits.onnx
|
90 |
+
model_type: VITS
|
91 |
+
vocoder:
|
92 |
+
vocoder_type: not_used
|
model.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6d18b3b99099fcad3bf5b5c02cbc98f262483a72699a9e995833fffbdc445053
|
3 |
+
size 137938668
|