Update hyperparams and citations
Browse files- README.md +29 -3
- hyperparams.yaml +5 -4
README.md
CHANGED
@@ -27,9 +27,9 @@ SpeechBrain. For a better experience, we encourage you to learn more about
|
|
27 |
[SpeechBrain](https://speechbrain.github.io).
|
28 |
The performance of the model is the following:
|
29 |
|
30 |
-
| Release
|
31 |
-
|
32 |
-
| 09-05-21 |
|
33 |
|
34 |
## Pipeline description
|
35 |
|
@@ -105,4 +105,30 @@ Please, cite SpeechBrain if you use it for your research or business.
|
|
105 |
primaryClass={eess.AS},
|
106 |
note={arXiv:2106.04624}
|
107 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
108 |
```
|
|
|
27 |
[SpeechBrain](https://speechbrain.github.io).
|
28 |
The performance of the model is the following:
|
29 |
|
30 |
+
| Release | eval clean CER | eval other CER | GPUs |
|
31 |
+
| :------: | :------------: | :------------: | :---------: |
|
32 |
+
| 09-05-21 | 7.48% | 8.38% | 6xA100 80GB |
|
33 |
|
34 |
## Pipeline description
|
35 |
|
|
|
105 |
primaryClass={eess.AS},
|
106 |
note={arXiv:2106.04624}
|
107 |
}
|
108 |
+
```
|
109 |
+
|
110 |
+
# Citing the model
|
111 |
+
```bibtex
|
112 |
+
@misc{returnzero,
|
113 |
+
title = {ReturnZero Conformer Korean ASR model},
|
114 |
+
author = {Dongwon Kim and Dongwoo Kim and Roh Jeongkyu},
|
115 |
+
year = {2021},
|
116 |
+
howpublished = {\url{https://huggingface.co/ddwkim/asr-conformer-transformerlm-ksponspeech}},
|
117 |
+
}
|
118 |
+
```
|
119 |
+
|
120 |
+
# Citing KsponSpeech dataset
|
121 |
+
```bibtex
|
122 |
+
@Article{app10196936,
|
123 |
+
AUTHOR = {Bang, Jeong-Uk and Yun, Seung and Kim, Seung-Hi and Choi, Mu-Yeol and Lee, Min-Kyu and Kim, Yeo-Jeong and Kim, Dong-Hyun and Park, Jun and Lee, Young-Jik and Kim, Sang-Hun},
|
124 |
+
TITLE = {KsponSpeech: Korean Spontaneous Speech Corpus for Automatic Speech Recognition},
|
125 |
+
JOURNAL = {Applied Sciences},
|
126 |
+
VOLUME = {10},
|
127 |
+
YEAR = {2020},
|
128 |
+
NUMBER = {19},
|
129 |
+
ARTICLE-NUMBER = {6936},
|
130 |
+
URL = {https://www.mdpi.com/2076-3417/10/19/6936},
|
131 |
+
ISSN = {2076-3417},
|
132 |
+
DOI = {10.3390/app10196936}
|
133 |
+
}
|
134 |
```
|
hyperparams.yaml
CHANGED
@@ -5,7 +5,8 @@
|
|
5 |
# Tokens: unigram
|
6 |
# losses: CTC + KLdiv (Label Smoothing loss)
|
7 |
# Training: KsponSpeech 965.2h
|
8 |
-
#
|
|
|
9 |
# ############################################################################
|
10 |
# Seed needs to be set at top of yaml, before objects with parameters are made
|
11 |
|
@@ -40,7 +41,7 @@ max_decode_ratio: 1.0
|
|
40 |
valid_search_interval: 10
|
41 |
valid_beam_size: 10
|
42 |
test_beam_size: 60
|
43 |
-
lm_weight: 0.
|
44 |
ctc_weight_decode: 0.40
|
45 |
|
46 |
############################## models ################################
|
@@ -105,8 +106,8 @@ decoder: !new:speechbrain.decoders.S2STransformerBeamSearch
|
|
105 |
ctc_weight: !ref <ctc_weight_decode>
|
106 |
lm_weight: !ref <lm_weight>
|
107 |
lm_modules: !ref <lm_model>
|
108 |
-
temperature: 1.
|
109 |
-
temperature_lm: 1.
|
110 |
using_eos_threshold: False
|
111 |
length_normalization: True
|
112 |
|
|
|
5 |
# Tokens: unigram
|
6 |
# losses: CTC + KLdiv (Label Smoothing loss)
|
7 |
# Training: KsponSpeech 965.2h
|
8 |
+
# Based on the works of: Jianyuan Zhong, Titouan Parcollet 2021
|
9 |
+
# Authors: Dongwon Kim, Dongwoo Kim 2021
|
10 |
# ############################################################################
|
11 |
# Seed needs to be set at top of yaml, before objects with parameters are made
|
12 |
|
|
|
41 |
valid_search_interval: 10
|
42 |
valid_beam_size: 10
|
43 |
test_beam_size: 60
|
44 |
+
lm_weight: 0.20
|
45 |
ctc_weight_decode: 0.40
|
46 |
|
47 |
############################## models ################################
|
|
|
106 |
ctc_weight: !ref <ctc_weight_decode>
|
107 |
lm_weight: !ref <lm_weight>
|
108 |
lm_modules: !ref <lm_model>
|
109 |
+
temperature: 1.25
|
110 |
+
temperature_lm: 1.25
|
111 |
using_eos_threshold: False
|
112 |
length_normalization: True
|
113 |
|