davidmezzetti
commited on
Commit
•
62526e7
1
Parent(s):
7aa1d35
Add model files
Browse files- README.md +30 -0
- config.yaml +97 -0
- model.onnx +3 -0
README.md
CHANGED
@@ -1,3 +1,33 @@
|
|
1 |
---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
license: apache-2.0
|
3 |
---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
---
|
2 |
+
tags:
|
3 |
+
- espnet
|
4 |
+
- audio
|
5 |
+
- text-to-speech
|
6 |
+
- onnx
|
7 |
+
language: en
|
8 |
+
datasets:
|
9 |
+
- ljspeech
|
10 |
license: apache-2.0
|
11 |
---
|
12 |
+
|
13 |
+
# ESPnet JETS Text-to-Speech (TTS) Model for ONNX
|
14 |
+
|
15 |
+
[imdanboy/jets](https://huggingface.co/imdanboy/jets) exported to ONNX. This model is an ONNX export using the [espnet_onnx](https://github.com/espnet/espnet_onnx) library.
|
16 |
+
|
17 |
+
## Usage with txtai
|
18 |
+
|
19 |
+
txtai has a built in Text to Speech (TTS) pipeline that makes using this model easy.
|
20 |
+
|
21 |
+
```python
|
22 |
+
```
|
23 |
+
|
24 |
+
## Usage with ONNX
|
25 |
+
|
26 |
+
This model can also be run directly with ONNX provided the input text is tokenized. Tokenization can be done with [ttstokenizer](https://github.com/neuml/ttstokenizer).
|
27 |
+
|
28 |
+
```python
|
29 |
+
```
|
30 |
+
|
31 |
+
## How to export
|
32 |
+
|
33 |
+
More information on how to export ESPnet models to ONNX can be [found here](https://github.com/espnet/espnet_onnx#text2speech-inference).
|
config.yaml
ADDED
@@ -0,0 +1,97 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
normalize:
|
2 |
+
eps: 1.0e-20
|
3 |
+
norm_means: true
|
4 |
+
norm_vars: true
|
5 |
+
stats_file: imdanboy/ljspeech_tts_train_jets_raw_phn_tacotron_g2p_en_no_space_train.total_count.ave/feats_stats.npz
|
6 |
+
type: gmvn
|
7 |
+
use_normalize: true
|
8 |
+
text_cleaner:
|
9 |
+
cleaner_types: tacotron
|
10 |
+
token:
|
11 |
+
list:
|
12 |
+
- <blank>
|
13 |
+
- <unk>
|
14 |
+
- AH0
|
15 |
+
- N
|
16 |
+
- T
|
17 |
+
- D
|
18 |
+
- S
|
19 |
+
- R
|
20 |
+
- L
|
21 |
+
- DH
|
22 |
+
- K
|
23 |
+
- Z
|
24 |
+
- IH1
|
25 |
+
- IH0
|
26 |
+
- M
|
27 |
+
- EH1
|
28 |
+
- W
|
29 |
+
- P
|
30 |
+
- AE1
|
31 |
+
- AH1
|
32 |
+
- V
|
33 |
+
- ER0
|
34 |
+
- F
|
35 |
+
- ','
|
36 |
+
- AA1
|
37 |
+
- B
|
38 |
+
- HH
|
39 |
+
- IY1
|
40 |
+
- UW1
|
41 |
+
- IY0
|
42 |
+
- AO1
|
43 |
+
- EY1
|
44 |
+
- AY1
|
45 |
+
- .
|
46 |
+
- OW1
|
47 |
+
- SH
|
48 |
+
- NG
|
49 |
+
- G
|
50 |
+
- ER1
|
51 |
+
- CH
|
52 |
+
- JH
|
53 |
+
- Y
|
54 |
+
- AW1
|
55 |
+
- TH
|
56 |
+
- UH1
|
57 |
+
- EH2
|
58 |
+
- OW0
|
59 |
+
- EY2
|
60 |
+
- AO0
|
61 |
+
- IH2
|
62 |
+
- AE2
|
63 |
+
- AY2
|
64 |
+
- AA2
|
65 |
+
- UW0
|
66 |
+
- EH0
|
67 |
+
- OY1
|
68 |
+
- EY0
|
69 |
+
- AO2
|
70 |
+
- ZH
|
71 |
+
- OW2
|
72 |
+
- AE0
|
73 |
+
- UW2
|
74 |
+
- AH2
|
75 |
+
- AY0
|
76 |
+
- IY2
|
77 |
+
- AW2
|
78 |
+
- AA0
|
79 |
+
- ''''
|
80 |
+
- ER2
|
81 |
+
- UH2
|
82 |
+
- '?'
|
83 |
+
- OY2
|
84 |
+
- '!'
|
85 |
+
- AW0
|
86 |
+
- UH0
|
87 |
+
- OY0
|
88 |
+
- ..
|
89 |
+
- <sos/eos>
|
90 |
+
tokenizer:
|
91 |
+
g2p_type: g2p_en_no_space
|
92 |
+
token_type: phn
|
93 |
+
tts_model:
|
94 |
+
model_path: imdanboy/ljspeech_tts_train_jets_raw_phn_tacotron_g2p_en_no_space_train.total_count.ave/full/jets.onnx
|
95 |
+
model_type: JETS
|
96 |
+
vocoder:
|
97 |
+
vocoder_type: not_used
|
model.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f8222def33152135d49b4a27dff203c24c2c6c68a7fcaf2b649475ac2fe0c871
|
3 |
+
size 132537001
|