update with spn_predictor

#1
Files changed (1) hide show
  1. hyperparams.yaml +23 -4
hyperparams.yaml CHANGED
@@ -2,6 +2,7 @@
2
  # Model: Fastspeech2 for TTS
3
  # Authors: Sathvik Udupa, Yingzhi Wang, Pradnya Kandarkar
4
  # ################################
 
5
  # Input parameters
6
  lexicon:
7
  - AA
@@ -45,7 +46,7 @@ lexicon:
45
  - ZH
46
  - spn
47
 
48
- n_symbols: 41 #fixed deppending on symbols in the lexicon +1 for a dummy symbol used for padding
49
  padding_idx: 0
50
  n_mel_channels: 80
51
 
@@ -73,18 +74,34 @@ postnet_kernel_size: 5
73
  postnet_n_convolutions: 5
74
  postnet_dropout: 0.5
75
 
76
- # Common
77
  normalize_before: True
78
  ffn_type: 1dcnn #1dcnn or ffn
79
  ffn_cnn_kernel_size_list: [9, 1]
80
 
81
- # Variance predictor
82
  dur_pred_kernel_size: 3
83
  pitch_pred_kernel_size: 3
84
  energy_pred_kernel_size: 3
85
  variance_predictor_dropout: 0.5
86
 
87
- # Model
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
  model: !new:speechbrain.lobes.models.FastSpeech2.FastSpeech2
89
  enc_num_layers: !ref <enc_num_layers>
90
  enc_num_head: !ref <enc_num_head>
@@ -119,8 +136,10 @@ model: !new:speechbrain.lobes.models.FastSpeech2.FastSpeech2
119
  input_encoder: !new:speechbrain.dataio.encoder.TextEncoder
120
 
121
  modules:
 
122
  model: !ref <model>
123
 
124
  pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
125
  loadables:
 
126
  model: !ref <model>
 
2
  # Model: Fastspeech2 for TTS
3
  # Authors: Sathvik Udupa, Yingzhi Wang, Pradnya Kandarkar
4
  # ################################
5
+
6
  # Input parameters
7
  lexicon:
8
  - AA
 
46
  - ZH
47
  - spn
48
 
49
+ n_symbols: 41 #fixed depending on symbols in the lexicon +1 for a dummy symbol used for padding
50
  padding_idx: 0
51
  n_mel_channels: 80
52
 
 
74
  postnet_n_convolutions: 5
75
  postnet_dropout: 0.5
76
 
77
+ # common
78
  normalize_before: True
79
  ffn_type: 1dcnn #1dcnn or ffn
80
  ffn_cnn_kernel_size_list: [9, 1]
81
 
82
+ # variance predictor
83
  dur_pred_kernel_size: 3
84
  pitch_pred_kernel_size: 3
85
  energy_pred_kernel_size: 3
86
  variance_predictor_dropout: 0.5
87
 
88
+ # SPN predictor
89
+ spn_predictor: !new:speechbrain.lobes.models.FastSpeech2.SPNPredictor
90
+ enc_num_layers: !ref <enc_num_layers>
91
+ enc_num_head: !ref <enc_num_head>
92
+ enc_d_model: !ref <enc_d_model>
93
+ enc_ffn_dim: !ref <enc_ffn_dim>
94
+ enc_k_dim: !ref <enc_k_dim>
95
+ enc_v_dim: !ref <enc_v_dim>
96
+ enc_dropout: !ref <enc_dropout>
97
+ normalize_before: !ref <normalize_before>
98
+ ffn_type: !ref <ffn_type>
99
+ ffn_cnn_kernel_size_list: !ref <ffn_cnn_kernel_size_list>
100
+ n_char: !ref <n_symbols>
101
+ padding_idx: !ref <padding_idx>
102
+
103
+
104
+ #model
105
  model: !new:speechbrain.lobes.models.FastSpeech2.FastSpeech2
106
  enc_num_layers: !ref <enc_num_layers>
107
  enc_num_head: !ref <enc_num_head>
 
136
  input_encoder: !new:speechbrain.dataio.encoder.TextEncoder
137
 
138
  modules:
139
+ spn_predictor: !ref <spn_predictor>
140
  model: !ref <model>
141
 
142
  pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
143
  loadables:
144
+ spn_predictor: !ref <spn_predictor>
145
  model: !ref <model>