sdelangen commited on
Commit
939aaa5
·
verified ·
1 Parent(s): c0898c9

Create hyperparams.yaml

Browse files
Files changed (1) hide show
  1. hyperparams.yaml +0 -49
hyperparams.yaml CHANGED
@@ -10,31 +10,6 @@
10
 
11
  save_folder: !ref librispeech-streaming-conformer-transducer
12
 
13
- # Training parameters
14
- # To make Transformers converge, the global bath size should be large enough.
15
- # The global batch size is computed as batch_size * n_gpus * grad_accumulation_factor.
16
- # Empirically, we found that this value should be >= 128.
17
- # Please, set your parameters accordingly.
18
- number_of_epochs: 50
19
- warmup_steps: 25000
20
- num_workers: 4
21
- batch_size_valid: 4
22
- lr: 0.0008
23
- weight_decay: 0.01
24
- number_of_ctc_epochs: 40
25
- ctc_weight: 0.3 # Multitask with CTC for the encoder (0.0 = disabled)
26
- ce_weight: 0.0 # Multitask with CE for the decoder (0.0 = disabled)
27
- max_grad_norm: 5.0
28
- loss_reduction: 'batchmean'
29
- precision: fp32 # bf16, fp16 or fp32
30
-
31
- # The batch size is used if and only if dynamic batching is set to False
32
- # Validation and testing are done with fixed batches and not dynamic batching.
33
- batch_size: 8
34
- grad_accumulation_factor: 4
35
- sorting: ascending
36
- avg_checkpoints: 10 # Number of checkpoints to average for evaluation
37
-
38
  # Feature parameters
39
  sample_rate: 16000
40
  n_fft: 512
@@ -44,22 +19,6 @@ win_length: 32
44
  # Streaming
45
  streaming: True # controls all Dynamic Chunk Training & chunk size & left context mechanisms
46
 
47
- # This setup works well for 3090 24GB GPU, adapt it to your needs.
48
- # Adjust grad_accumulation_factor depending on the DDP node count (here 3)
49
- # Or turn it off (but training speed will decrease)
50
- dynamic_batching: True
51
- max_batch_len: 250
52
- max_batch_len_val: 50 # we reduce it as the beam is much wider (VRAM)
53
- num_bucket: 200
54
-
55
- dynamic_batch_sampler:
56
- max_batch_len: !ref <max_batch_len>
57
- max_batch_len_val: !ref <max_batch_len_val>
58
- num_buckets: !ref <num_bucket>
59
- shuffle_ex: True # if true re-creates batches at each epoch shuffling examples.
60
- batch_ordering: random
61
- max_batch_ex: 256
62
-
63
  # Model parameters
64
  # Transformer
65
  d_model: 512
@@ -88,9 +47,6 @@ state_beam: 2.3
88
  expand_beam: 2.3
89
  lm_weight: 0.50
90
 
91
- epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
92
- limit: !ref <number_of_epochs>
93
-
94
  normalize: !new:speechbrain.processing.features.InputNormalization
95
  norm_type: global
96
  update_until_epoch: 4
@@ -146,11 +102,6 @@ proj_dec: !new:speechbrain.nnet.linear.Linear
146
  n_neurons: !ref <joint_dim>
147
  bias: False
148
 
149
- # Uncomment for MTL with CTC
150
- ctc_cost: !name:speechbrain.nnet.losses.ctc_loss
151
- blank_index: !ref <blank_index>
152
- reduction: !ref <loss_reduction>
153
-
154
  emb: !new:speechbrain.nnet.embedding.Embedding
155
  num_embeddings: !ref <output_neurons>
156
  consider_as_one_hot: True
 
10
 
11
  save_folder: !ref librispeech-streaming-conformer-transducer
12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  # Feature parameters
14
  sample_rate: 16000
15
  n_fft: 512
 
19
  # Streaming
20
  streaming: True # controls all Dynamic Chunk Training & chunk size & left context mechanisms
21
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  # Model parameters
23
  # Transformer
24
  d_model: 512
 
47
  expand_beam: 2.3
48
  lm_weight: 0.50
49
 
 
 
 
50
  normalize: !new:speechbrain.processing.features.InputNormalization
51
  norm_type: global
52
  update_until_epoch: 4
 
102
  n_neurons: !ref <joint_dim>
103
  bias: False
104
 
 
 
 
 
 
105
  emb: !new:speechbrain.nnet.embedding.Embedding
106
  num_embeddings: !ref <output_neurons>
107
  consider_as_one_hot: True