mtl-mimic-voicebank / hyperparams.yaml
pplantinga's picture
Revert "Update model to latest version"
2a2af5c
raw
history blame
2.48 kB
# STFT arguments
sample_rate: 16000
n_fft: 512
win_length: 32
hop_length: 16
# Enhancement model args
emb_channels: 1024
emb_kernel_size: 3
emb_padding: same
enhancer_size: 512
enhancer_layers: 8
enhancer_heads: 8
enhancer_causal: False
enhancer_drop_rate: 0.1
compute_stft: !new:speechbrain.processing.features.STFT
sample_rate: !ref <sample_rate>
n_fft: !ref <n_fft>
win_length: !ref <win_length>
hop_length: !ref <hop_length>
compute_istft: !new:speechbrain.processing.features.ISTFT
sample_rate: !ref <sample_rate>
n_fft: !ref <n_fft>
win_length: !ref <win_length>
hop_length: !ref <hop_length>
spectral_magnitude: !name:speechbrain.processing.features.spectral_magnitude
power: 0.5
resynth: !name:speechbrain.processing.signal_processing.resynthesize
stft: !ref <compute_stft>
istft: !ref <compute_istft>
enhance_model: !new:speechbrain.lobes.models.transformer.TransformerSE.CNNTransformerSE
output_size: !ref <n_fft> // 2 + 1
d_model: !ref <n_fft> // 2
output_activation: !name:torch.nn.ReLU
activation: !name:torch.nn.LeakyReLU
dropout: !ref <enhancer_drop_rate>
num_layers: !ref <enhancer_layers>
d_ffn: !ref <enhancer_size>
nhead: !ref <enhancer_heads>
causal: !ref <enhancer_causal>
custom_emb_module: !new:speechbrain.nnet.containers.Sequential
input_shape: [null, null, !ref <n_fft> // 2 + 1]
conv1: !name:speechbrain.nnet.CNN.Conv1d
out_channels: !ref <emb_channels>
kernel_size: 3
norm1: !name:speechbrain.nnet.normalization.LayerNorm
act1: !new:torch.nn.LeakyReLU
conv2: !name:speechbrain.nnet.CNN.Conv1d
out_channels: !ref <emb_channels> // 2
kernel_size: 3
norm2: !name:speechbrain.nnet.normalization.LayerNorm
act2: !new:torch.nn.LeakyReLU
conv3: !name:speechbrain.nnet.CNN.Conv1d
out_channels: !ref <emb_channels> // 4
kernel_size: 3
norm3: !name:speechbrain.nnet.normalization.LayerNorm
act3: !new:torch.nn.LeakyReLU
conv4: !name:speechbrain.nnet.CNN.Conv1d
out_channels: !ref <emb_channels> // 4
kernel_size: 3
norm4: !name:speechbrain.nnet.normalization.LayerNorm
act4: !new:torch.nn.LeakyReLU
modules:
enhance_model: !ref <enhance_model>
pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
loadables:
enhance_model: !ref <enhance_model>