File size: 2,197 Bytes
c8c5132 65de50d e87343a 65de50d e87343a 65de50d e87343a 65de50d e87343a 65de50d e87343a 65de50d e87343a 65de50d e87343a 65de50d e87343a 65de50d e87343a 65de50d e87343a 65de50d e87343a 65de50d e87343a 65de50d e87343a 65de50d e87343a 65de50d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 |
# ################################
# Model: Tacotroon2 for TTS
# Authors: Artem Ploujnikov, Yingzhi Wang
# ################################
mask_padding: True
n_mel_channels: 80
n_symbols: 148
symbols_embedding_dim: 512
encoder_kernel_size: 5
encoder_n_convolutions: 3
encoder_embedding_dim: 512
attention_rnn_dim: 1024
attention_dim: 128
attention_location_n_filters: 32
attention_location_kernel_size: 31
n_frames_per_step: 1
decoder_rnn_dim: 1024
prenet_dim: 256
max_decoder_steps: 1000
gate_threshold: 0.5
p_attention_dropout: 0.1
p_decoder_dropout: 0.1
postnet_embedding_dim: 512
postnet_kernel_size: 5
postnet_n_convolutions: 5
decoder_no_early_stopping: False
sample_rate: 22050
# Model
model: !new:speechbrain.lobes.models.Tacotron2.Tacotron2
mask_padding: !ref <mask_padding>
n_mel_channels: !ref <n_mel_channels>
# symbols
n_symbols: !ref <n_symbols>
symbols_embedding_dim: !ref <symbols_embedding_dim>
# encoder
encoder_kernel_size: !ref <encoder_kernel_size>
encoder_n_convolutions: !ref <encoder_n_convolutions>
encoder_embedding_dim: !ref <encoder_embedding_dim>
# attention
attention_rnn_dim: !ref <attention_rnn_dim>
attention_dim: !ref <attention_dim>
# attention location
attention_location_n_filters: !ref <attention_location_n_filters>
attention_location_kernel_size: !ref <attention_location_kernel_size>
# decoder
n_frames_per_step: !ref <n_frames_per_step>
decoder_rnn_dim: !ref <decoder_rnn_dim>
prenet_dim: !ref <prenet_dim>
max_decoder_steps: !ref <max_decoder_steps>
gate_threshold: !ref <gate_threshold>
p_attention_dropout: !ref <p_attention_dropout>
p_decoder_dropout: !ref <p_decoder_dropout>
# postnet
postnet_embedding_dim: !ref <postnet_embedding_dim>
postnet_kernel_size: !ref <postnet_kernel_size>
postnet_n_convolutions: !ref <postnet_n_convolutions>
decoder_no_early_stopping: !ref <decoder_no_early_stopping>
# Function that converts the text into a sequence of valid characters.
text_to_sequence: !name:speechbrain.utils.text_to_sequence.text_to_sequence
modules:
model: !ref <model>
pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
loadables:
model: !ref <model> |