pyf98 commited on
Commit
4240c36
1 Parent(s): 65c319f

Update model

Browse files
Files changed (20) hide show
  1. README.md +326 -0
  2. exp/asr_35commands_conformer_noBatchNorm_warmup5k_lr2e-4_accum3_conv15_5speeds/RESULTS.md +29 -0
  3. exp/asr_35commands_conformer_noBatchNorm_warmup5k_lr2e-4_accum3_conv15_5speeds/config.yaml +228 -0
  4. exp/asr_35commands_conformer_noBatchNorm_warmup5k_lr2e-4_accum3_conv15_5speeds/images/acc.png +0 -0
  5. exp/asr_35commands_conformer_noBatchNorm_warmup5k_lr2e-4_accum3_conv15_5speeds/images/backward_time.png +0 -0
  6. exp/asr_35commands_conformer_noBatchNorm_warmup5k_lr2e-4_accum3_conv15_5speeds/images/cer.png +0 -0
  7. exp/asr_35commands_conformer_noBatchNorm_warmup5k_lr2e-4_accum3_conv15_5speeds/images/cer_ctc.png +0 -0
  8. exp/asr_35commands_conformer_noBatchNorm_warmup5k_lr2e-4_accum3_conv15_5speeds/images/forward_time.png +0 -0
  9. exp/asr_35commands_conformer_noBatchNorm_warmup5k_lr2e-4_accum3_conv15_5speeds/images/gpu_max_cached_mem_GB.png +0 -0
  10. exp/asr_35commands_conformer_noBatchNorm_warmup5k_lr2e-4_accum3_conv15_5speeds/images/iter_time.png +0 -0
  11. exp/asr_35commands_conformer_noBatchNorm_warmup5k_lr2e-4_accum3_conv15_5speeds/images/loss.png +0 -0
  12. exp/asr_35commands_conformer_noBatchNorm_warmup5k_lr2e-4_accum3_conv15_5speeds/images/loss_att.png +0 -0
  13. exp/asr_35commands_conformer_noBatchNorm_warmup5k_lr2e-4_accum3_conv15_5speeds/images/loss_ctc.png +0 -0
  14. exp/asr_35commands_conformer_noBatchNorm_warmup5k_lr2e-4_accum3_conv15_5speeds/images/optim0_lr0.png +0 -0
  15. exp/asr_35commands_conformer_noBatchNorm_warmup5k_lr2e-4_accum3_conv15_5speeds/images/optim_step_time.png +0 -0
  16. exp/asr_35commands_conformer_noBatchNorm_warmup5k_lr2e-4_accum3_conv15_5speeds/images/train_time.png +0 -0
  17. exp/asr_35commands_conformer_noBatchNorm_warmup5k_lr2e-4_accum3_conv15_5speeds/images/wer.png +0 -0
  18. exp/asr_35commands_conformer_noBatchNorm_warmup5k_lr2e-4_accum3_conv15_5speeds/valid.acc.ave_10best.pth +3 -0
  19. exp/asr_stats_fbank_pitch_word_sp/train/feats_stats.npz +0 -0
  20. meta.yaml +8 -0
README.md ADDED
@@ -0,0 +1,326 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - espnet
4
+ - audio
5
+ - automatic-speech-recognition
6
+ language: noinfo
7
+ datasets:
8
+ - speechcommands
9
+ license: cc-by-4.0
10
+ ---
11
+
12
+ ## ESPnet2 ASR model
13
+
14
+ ### `pyf98/speechcommands_35commands_conformer`
15
+
16
+ This model was trained by Yifan Peng using speechcommands recipe in [espnet](https://github.com/espnet/espnet/).
17
+
18
+ ### Demo: How to use in ESPnet2
19
+
20
+ ```bash
21
+ cd espnet
22
+ git checkout bf523b70cae8300da004b41ec6a0d1b57c7ae8bb
23
+ pip install -e .
24
+ cd egs2/speechcommands/asr1
25
+ ./run.sh --skip_data_prep false --skip_train true --download_model pyf98/speechcommands_35commands_conformer
26
+ ```
27
+
28
+ <!-- Generated by scripts/utils/show_asr_result.sh -->
29
+ # RESULTS
30
+ ## Environments
31
+ - date: `Tue Dec 28 20:39:29 EST 2021`
32
+ - python version: `3.9.7 (default, Sep 16 2021, 13:09:58) [GCC 7.5.0]`
33
+ - espnet version: `espnet 0.10.5a1`
34
+ - pytorch version: `pytorch 1.9.0`
35
+ - Git hash: `bf523b70cae8300da004b41ec6a0d1b57c7ae8bb`
36
+ - Commit date: `Tue Dec 28 14:53:22 2021 -0500`
37
+
38
+ ## asr_35commands_conformer_noBatchNorm_warmup5k_lr2e-4_accum3_conv15_5speeds
39
+ ### WER
40
+
41
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
42
+ |---|---|---|---|---|---|---|---|---|
43
+ |infer/dev|9981|9981|97.4|2.6|0.0|0.0|2.6|2.6|
44
+ |infer/test|11005|11005|97.5|2.5|0.0|0.0|2.5|2.5|
45
+
46
+ ### CER
47
+
48
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
49
+ |---|---|---|---|---|---|---|---|---|
50
+ |infer/dev|9981|38699|98.2|1.3|0.5|0.7|2.5|2.6|
51
+ |infer/test|11005|42682|98.3|1.2|0.5|0.6|2.3|2.5|
52
+
53
+ ### TER
54
+
55
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
56
+ |---|---|---|---|---|---|---|---|---|
57
+
58
+ ## ASR config
59
+
60
+ <details><summary>expand</summary>
61
+
62
+ ```
63
+ config: conf/train_asr_conformer_noBatchNorm.yaml
64
+ print_config: false
65
+ log_level: INFO
66
+ dry_run: false
67
+ iterator_type: sequence
68
+ output_dir: exp/asr_35commands_conformer_noBatchNorm_warmup5k_lr2e-4_accum3_conv15_5speeds
69
+ ngpu: 1
70
+ seed: 0
71
+ num_workers: 1
72
+ num_att_plot: 3
73
+ dist_backend: nccl
74
+ dist_init_method: env://
75
+ dist_world_size: null
76
+ dist_rank: null
77
+ local_rank: 0
78
+ dist_master_addr: null
79
+ dist_master_port: null
80
+ dist_launcher: null
81
+ multiprocessing_distributed: false
82
+ unused_parameters: false
83
+ sharded_ddp: false
84
+ cudnn_enabled: true
85
+ cudnn_benchmark: false
86
+ cudnn_deterministic: true
87
+ collect_stats: false
88
+ write_collected_feats: false
89
+ max_epoch: 150
90
+ patience: null
91
+ val_scheduler_criterion:
92
+ - valid
93
+ - loss
94
+ early_stopping_criterion:
95
+ - valid
96
+ - loss
97
+ - min
98
+ best_model_criterion:
99
+ - - valid
100
+ - loss
101
+ - min
102
+ - - valid
103
+ - acc
104
+ - max
105
+ keep_nbest_models: 10
106
+ grad_clip: 5.0
107
+ grad_clip_type: 2.0
108
+ grad_noise: false
109
+ accum_grad: 3
110
+ no_forward_run: false
111
+ resume: true
112
+ train_dtype: float32
113
+ use_amp: false
114
+ log_interval: null
115
+ use_tensorboard: true
116
+ use_wandb: false
117
+ wandb_project: null
118
+ wandb_id: null
119
+ wandb_entity: null
120
+ wandb_name: null
121
+ wandb_model_log_interval: -1
122
+ detect_anomaly: false
123
+ pretrain_path: null
124
+ init_param: []
125
+ ignore_init_mismatch: false
126
+ freeze_param: []
127
+ num_iters_per_epoch: null
128
+ batch_size: 20
129
+ valid_batch_size: null
130
+ batch_bins: 4000000
131
+ valid_batch_bins: null
132
+ train_shape_file:
133
+ - exp/asr_stats_fbank_pitch_word_sp/train/speech_shape
134
+ - exp/asr_stats_fbank_pitch_word_sp/train/text_shape.word
135
+ valid_shape_file:
136
+ - exp/asr_stats_fbank_pitch_word_sp/valid/speech_shape
137
+ - exp/asr_stats_fbank_pitch_word_sp/valid/text_shape.word
138
+ batch_type: numel
139
+ valid_batch_type: null
140
+ fold_length:
141
+ - 800
142
+ - 150
143
+ sort_in_batch: descending
144
+ sort_batch: descending
145
+ multiple_iterator: false
146
+ chunk_length: 500
147
+ chunk_shift_ratio: 0.5
148
+ num_cache_chunks: 1024
149
+ train_data_path_and_name_and_type:
150
+ - - dump/fbank_pitch/train_sp/feats.scp
151
+ - speech
152
+ - kaldi_ark
153
+ - - dump/fbank_pitch/train_sp/text
154
+ - text
155
+ - text
156
+ valid_data_path_and_name_and_type:
157
+ - - dump/fbank_pitch/dev/feats.scp
158
+ - speech
159
+ - kaldi_ark
160
+ - - dump/fbank_pitch/dev/text
161
+ - text
162
+ - text
163
+ allow_variable_data_keys: false
164
+ max_cache_size: 0.0
165
+ max_cache_fd: 32
166
+ valid_max_cache_size: null
167
+ optim: adam
168
+ optim_conf:
169
+ lr: 0.0002
170
+ scheduler: warmuplr
171
+ scheduler_conf:
172
+ warmup_steps: 5000
173
+ token_list:
174
+ - <blank>
175
+ - <unk>
176
+ - zero
177
+ - five
178
+ - 'yes'
179
+ - seven
180
+ - nine
181
+ - one
182
+ - down
183
+ - 'no'
184
+ - stop
185
+ - two
186
+ - go
187
+ - six
188
+ - 'on'
189
+ - left
190
+ - eight
191
+ - right
192
+ - 'off'
193
+ - three
194
+ - four
195
+ - up
196
+ - house
197
+ - wow
198
+ - dog
199
+ - marvin
200
+ - bird
201
+ - cat
202
+ - happy
203
+ - sheila
204
+ - bed
205
+ - tree
206
+ - backward
207
+ - visual
208
+ - learn
209
+ - follow
210
+ - forward
211
+ - <sos/eos>
212
+ init: null
213
+ input_size: 83
214
+ ctc_conf:
215
+ dropout_rate: 0.0
216
+ ctc_type: builtin
217
+ reduce: true
218
+ ignore_nan_grad: true
219
+ model_conf:
220
+ ctc_weight: 0.0
221
+ lsm_weight: 0.1
222
+ length_normalized_loss: false
223
+ use_preprocessor: true
224
+ token_type: word
225
+ bpemodel: null
226
+ non_linguistic_symbols: null
227
+ cleaner: null
228
+ g2p: null
229
+ speech_volume_normalize: null
230
+ rir_scp: null
231
+ rir_apply_prob: 1.0
232
+ noise_scp: null
233
+ noise_apply_prob: 1.0
234
+ noise_db_range: '13_15'
235
+ frontend: null
236
+ frontend_conf: {}
237
+ specaug: specaug
238
+ specaug_conf:
239
+ apply_time_warp: true
240
+ time_warp_window: 5
241
+ time_warp_mode: bicubic
242
+ apply_freq_mask: true
243
+ freq_mask_width_range:
244
+ - 0
245
+ - 30
246
+ num_freq_mask: 2
247
+ apply_time_mask: true
248
+ time_mask_width_range:
249
+ - 0
250
+ - 40
251
+ num_time_mask: 2
252
+ normalize: global_mvn
253
+ normalize_conf:
254
+ stats_file: exp/asr_stats_fbank_pitch_word_sp/train/feats_stats.npz
255
+ preencoder: null
256
+ preencoder_conf: {}
257
+ encoder: conformer
258
+ encoder_conf:
259
+ output_size: 256
260
+ attention_heads: 4
261
+ linear_units: 2048
262
+ num_blocks: 12
263
+ dropout_rate: 0.1
264
+ positional_dropout_rate: 0.1
265
+ attention_dropout_rate: 0.1
266
+ input_layer: conv2d
267
+ normalize_before: true
268
+ macaron_style: true
269
+ rel_pos_type: legacy
270
+ pos_enc_layer_type: rel_pos
271
+ selfattention_layer_type: rel_selfattn
272
+ activation_type: swish
273
+ use_cnn_module: true
274
+ cnn_module_kernel: 15
275
+ postencoder: null
276
+ postencoder_conf: {}
277
+ decoder: transformer
278
+ decoder_conf:
279
+ attention_heads: 4
280
+ linear_units: 2048
281
+ num_blocks: 6
282
+ dropout_rate: 0.1
283
+ positional_dropout_rate: 0.1
284
+ self_attention_dropout_rate: 0.1
285
+ src_attention_dropout_rate: 0.1
286
+ required:
287
+ - output_dir
288
+ - token_list
289
+ version: 0.10.3a3
290
+ distributed: false
291
+ ```
292
+
293
+ </details>
294
+
295
+
296
+
297
+ ### Citing ESPnet
298
+
299
+ ```BibTex
300
+ @inproceedings{watanabe2018espnet,
301
+ author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Yalta and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai},
302
+ title={{ESPnet}: End-to-End Speech Processing Toolkit},
303
+ year={2018},
304
+ booktitle={Proceedings of Interspeech},
305
+ pages={2207--2211},
306
+ doi={10.21437/Interspeech.2018-1456},
307
+ url={http://dx.doi.org/10.21437/Interspeech.2018-1456}
308
+ }
309
+
310
+
311
+
312
+
313
+ ```
314
+
315
+ or arXiv:
316
+
317
+ ```bibtex
318
+ @misc{watanabe2018espnet,
319
+ title={ESPnet: End-to-End Speech Processing Toolkit},
320
+ author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Yalta and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai},
321
+ year={2018},
322
+ eprint={1804.00015},
323
+ archivePrefix={arXiv},
324
+ primaryClass={cs.CL}
325
+ }
326
+ ```
exp/asr_35commands_conformer_noBatchNorm_warmup5k_lr2e-4_accum3_conv15_5speeds/RESULTS.md ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!-- Generated by scripts/utils/show_asr_result.sh -->
2
+ # RESULTS
3
+ ## Environments
4
+ - date: `Tue Dec 28 20:39:29 EST 2021`
5
+ - python version: `3.9.7 (default, Sep 16 2021, 13:09:58) [GCC 7.5.0]`
6
+ - espnet version: `espnet 0.10.5a1`
7
+ - pytorch version: `pytorch 1.9.0`
8
+ - Git hash: `bf523b70cae8300da004b41ec6a0d1b57c7ae8bb`
9
+ - Commit date: `Tue Dec 28 14:53:22 2021 -0500`
10
+
11
+ ## asr_35commands_conformer_noBatchNorm_warmup5k_lr2e-4_accum3_conv15_5speeds
12
+ ### WER
13
+
14
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
15
+ |---|---|---|---|---|---|---|---|---|
16
+ |infer/dev|9981|9981|97.4|2.6|0.0|0.0|2.6|2.6|
17
+ |infer/test|11005|11005|97.5|2.5|0.0|0.0|2.5|2.5|
18
+
19
+ ### CER
20
+
21
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
22
+ |---|---|---|---|---|---|---|---|---|
23
+ |infer/dev|9981|38699|98.2|1.3|0.5|0.7|2.5|2.6|
24
+ |infer/test|11005|42682|98.3|1.2|0.5|0.6|2.3|2.5|
25
+
26
+ ### TER
27
+
28
+ |dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
29
+ |---|---|---|---|---|---|---|---|---|
exp/asr_35commands_conformer_noBatchNorm_warmup5k_lr2e-4_accum3_conv15_5speeds/config.yaml ADDED
@@ -0,0 +1,228 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ config: conf/train_asr_conformer_noBatchNorm.yaml
2
+ print_config: false
3
+ log_level: INFO
4
+ dry_run: false
5
+ iterator_type: sequence
6
+ output_dir: exp/asr_35commands_conformer_noBatchNorm_warmup5k_lr2e-4_accum3_conv15_5speeds
7
+ ngpu: 1
8
+ seed: 0
9
+ num_workers: 1
10
+ num_att_plot: 3
11
+ dist_backend: nccl
12
+ dist_init_method: env://
13
+ dist_world_size: null
14
+ dist_rank: null
15
+ local_rank: 0
16
+ dist_master_addr: null
17
+ dist_master_port: null
18
+ dist_launcher: null
19
+ multiprocessing_distributed: false
20
+ unused_parameters: false
21
+ sharded_ddp: false
22
+ cudnn_enabled: true
23
+ cudnn_benchmark: false
24
+ cudnn_deterministic: true
25
+ collect_stats: false
26
+ write_collected_feats: false
27
+ max_epoch: 150
28
+ patience: null
29
+ val_scheduler_criterion:
30
+ - valid
31
+ - loss
32
+ early_stopping_criterion:
33
+ - valid
34
+ - loss
35
+ - min
36
+ best_model_criterion:
37
+ - - valid
38
+ - loss
39
+ - min
40
+ - - valid
41
+ - acc
42
+ - max
43
+ keep_nbest_models: 10
44
+ grad_clip: 5.0
45
+ grad_clip_type: 2.0
46
+ grad_noise: false
47
+ accum_grad: 3
48
+ no_forward_run: false
49
+ resume: true
50
+ train_dtype: float32
51
+ use_amp: false
52
+ log_interval: null
53
+ use_tensorboard: true
54
+ use_wandb: false
55
+ wandb_project: null
56
+ wandb_id: null
57
+ wandb_entity: null
58
+ wandb_name: null
59
+ wandb_model_log_interval: -1
60
+ detect_anomaly: false
61
+ pretrain_path: null
62
+ init_param: []
63
+ ignore_init_mismatch: false
64
+ freeze_param: []
65
+ num_iters_per_epoch: null
66
+ batch_size: 20
67
+ valid_batch_size: null
68
+ batch_bins: 4000000
69
+ valid_batch_bins: null
70
+ train_shape_file:
71
+ - exp/asr_stats_fbank_pitch_word_sp/train/speech_shape
72
+ - exp/asr_stats_fbank_pitch_word_sp/train/text_shape.word
73
+ valid_shape_file:
74
+ - exp/asr_stats_fbank_pitch_word_sp/valid/speech_shape
75
+ - exp/asr_stats_fbank_pitch_word_sp/valid/text_shape.word
76
+ batch_type: numel
77
+ valid_batch_type: null
78
+ fold_length:
79
+ - 800
80
+ - 150
81
+ sort_in_batch: descending
82
+ sort_batch: descending
83
+ multiple_iterator: false
84
+ chunk_length: 500
85
+ chunk_shift_ratio: 0.5
86
+ num_cache_chunks: 1024
87
+ train_data_path_and_name_and_type:
88
+ - - dump/fbank_pitch/train_sp/feats.scp
89
+ - speech
90
+ - kaldi_ark
91
+ - - dump/fbank_pitch/train_sp/text
92
+ - text
93
+ - text
94
+ valid_data_path_and_name_and_type:
95
+ - - dump/fbank_pitch/dev/feats.scp
96
+ - speech
97
+ - kaldi_ark
98
+ - - dump/fbank_pitch/dev/text
99
+ - text
100
+ - text
101
+ allow_variable_data_keys: false
102
+ max_cache_size: 0.0
103
+ max_cache_fd: 32
104
+ valid_max_cache_size: null
105
+ optim: adam
106
+ optim_conf:
107
+ lr: 0.0002
108
+ scheduler: warmuplr
109
+ scheduler_conf:
110
+ warmup_steps: 5000
111
+ token_list:
112
+ - <blank>
113
+ - <unk>
114
+ - zero
115
+ - five
116
+ - 'yes'
117
+ - seven
118
+ - nine
119
+ - one
120
+ - down
121
+ - 'no'
122
+ - stop
123
+ - two
124
+ - go
125
+ - six
126
+ - 'on'
127
+ - left
128
+ - eight
129
+ - right
130
+ - 'off'
131
+ - three
132
+ - four
133
+ - up
134
+ - house
135
+ - wow
136
+ - dog
137
+ - marvin
138
+ - bird
139
+ - cat
140
+ - happy
141
+ - sheila
142
+ - bed
143
+ - tree
144
+ - backward
145
+ - visual
146
+ - learn
147
+ - follow
148
+ - forward
149
+ - <sos/eos>
150
+ init: null
151
+ input_size: 83
152
+ ctc_conf:
153
+ dropout_rate: 0.0
154
+ ctc_type: builtin
155
+ reduce: true
156
+ ignore_nan_grad: true
157
+ model_conf:
158
+ ctc_weight: 0.0
159
+ lsm_weight: 0.1
160
+ length_normalized_loss: false
161
+ use_preprocessor: true
162
+ token_type: word
163
+ bpemodel: null
164
+ non_linguistic_symbols: null
165
+ cleaner: null
166
+ g2p: null
167
+ speech_volume_normalize: null
168
+ rir_scp: null
169
+ rir_apply_prob: 1.0
170
+ noise_scp: null
171
+ noise_apply_prob: 1.0
172
+ noise_db_range: '13_15'
173
+ frontend: null
174
+ frontend_conf: {}
175
+ specaug: specaug
176
+ specaug_conf:
177
+ apply_time_warp: true
178
+ time_warp_window: 5
179
+ time_warp_mode: bicubic
180
+ apply_freq_mask: true
181
+ freq_mask_width_range:
182
+ - 0
183
+ - 30
184
+ num_freq_mask: 2
185
+ apply_time_mask: true
186
+ time_mask_width_range:
187
+ - 0
188
+ - 40
189
+ num_time_mask: 2
190
+ normalize: global_mvn
191
+ normalize_conf:
192
+ stats_file: exp/asr_stats_fbank_pitch_word_sp/train/feats_stats.npz
193
+ preencoder: null
194
+ preencoder_conf: {}
195
+ encoder: conformer
196
+ encoder_conf:
197
+ output_size: 256
198
+ attention_heads: 4
199
+ linear_units: 2048
200
+ num_blocks: 12
201
+ dropout_rate: 0.1
202
+ positional_dropout_rate: 0.1
203
+ attention_dropout_rate: 0.1
204
+ input_layer: conv2d
205
+ normalize_before: true
206
+ macaron_style: true
207
+ rel_pos_type: legacy
208
+ pos_enc_layer_type: rel_pos
209
+ selfattention_layer_type: rel_selfattn
210
+ activation_type: swish
211
+ use_cnn_module: true
212
+ cnn_module_kernel: 15
213
+ postencoder: null
214
+ postencoder_conf: {}
215
+ decoder: transformer
216
+ decoder_conf:
217
+ attention_heads: 4
218
+ linear_units: 2048
219
+ num_blocks: 6
220
+ dropout_rate: 0.1
221
+ positional_dropout_rate: 0.1
222
+ self_attention_dropout_rate: 0.1
223
+ src_attention_dropout_rate: 0.1
224
+ required:
225
+ - output_dir
226
+ - token_list
227
+ version: 0.10.3a3
228
+ distributed: false
exp/asr_35commands_conformer_noBatchNorm_warmup5k_lr2e-4_accum3_conv15_5speeds/images/acc.png ADDED
exp/asr_35commands_conformer_noBatchNorm_warmup5k_lr2e-4_accum3_conv15_5speeds/images/backward_time.png ADDED
exp/asr_35commands_conformer_noBatchNorm_warmup5k_lr2e-4_accum3_conv15_5speeds/images/cer.png ADDED
exp/asr_35commands_conformer_noBatchNorm_warmup5k_lr2e-4_accum3_conv15_5speeds/images/cer_ctc.png ADDED
exp/asr_35commands_conformer_noBatchNorm_warmup5k_lr2e-4_accum3_conv15_5speeds/images/forward_time.png ADDED
exp/asr_35commands_conformer_noBatchNorm_warmup5k_lr2e-4_accum3_conv15_5speeds/images/gpu_max_cached_mem_GB.png ADDED
exp/asr_35commands_conformer_noBatchNorm_warmup5k_lr2e-4_accum3_conv15_5speeds/images/iter_time.png ADDED
exp/asr_35commands_conformer_noBatchNorm_warmup5k_lr2e-4_accum3_conv15_5speeds/images/loss.png ADDED
exp/asr_35commands_conformer_noBatchNorm_warmup5k_lr2e-4_accum3_conv15_5speeds/images/loss_att.png ADDED
exp/asr_35commands_conformer_noBatchNorm_warmup5k_lr2e-4_accum3_conv15_5speeds/images/loss_ctc.png ADDED
exp/asr_35commands_conformer_noBatchNorm_warmup5k_lr2e-4_accum3_conv15_5speeds/images/optim0_lr0.png ADDED
exp/asr_35commands_conformer_noBatchNorm_warmup5k_lr2e-4_accum3_conv15_5speeds/images/optim_step_time.png ADDED
exp/asr_35commands_conformer_noBatchNorm_warmup5k_lr2e-4_accum3_conv15_5speeds/images/train_time.png ADDED
exp/asr_35commands_conformer_noBatchNorm_warmup5k_lr2e-4_accum3_conv15_5speeds/images/wer.png ADDED
exp/asr_35commands_conformer_noBatchNorm_warmup5k_lr2e-4_accum3_conv15_5speeds/valid.acc.ave_10best.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0bf36bcef67cbbd54f244f35fb21e64aaf11c610fb0f0225f3aad51c5f7d9108
3
+ size 172284643
exp/asr_stats_fbank_pitch_word_sp/train/feats_stats.npz ADDED
Binary file (1.43 kB). View file
 
meta.yaml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ espnet: 0.10.5a1
2
+ files:
3
+ asr_model_file: exp/asr_35commands_conformer_noBatchNorm_warmup5k_lr2e-4_accum3_conv15_5speeds/valid.acc.ave_10best.pth
4
+ python: "3.9.7 (default, Sep 16 2021, 13:09:58) \n[GCC 7.5.0]"
5
+ timestamp: 1640741969.792623
6
+ torch: 1.9.0
7
+ yaml_files:
8
+ asr_train_config: exp/asr_35commands_conformer_noBatchNorm_warmup5k_lr2e-4_accum3_conv15_5speeds/config.yaml