|
num_embeddings: 8193 |
|
embedding_dim: 1024 |
|
|
|
in_channels: 128 |
|
out_channels: 1 |
|
resblock_type: "1" |
|
resblock_dilation_sizes: [[1, 3, 5], [1, 3, 5], [1, 3, 5]] |
|
resblock_kernel_sizes: [3, 7, 11] |
|
upsample_kernel_sizes: [11, 8, 8, 4, 4] |
|
upsample_initial_channel: 512 |
|
upsample_factors: [5, 4, 4, 2, 2] |
|
inference_padding: 5 |
|
cond_channels: 0 |
|
conv_post_bias: True |
|
|
|
generator: !new:speechbrain.lobes.models.HifiGAN.UnitHifiganGenerator |
|
in_channels: !ref <in_channels> |
|
out_channels: !ref <out_channels> |
|
resblock_type: !ref <resblock_type> |
|
resblock_dilation_sizes: !ref <resblock_dilation_sizes> |
|
resblock_kernel_sizes: !ref <resblock_kernel_sizes> |
|
upsample_kernel_sizes: !ref <upsample_kernel_sizes> |
|
upsample_initial_channel: !ref <upsample_initial_channel> |
|
upsample_factors: !ref <upsample_factors> |
|
inference_padding: !ref <inference_padding> |
|
cond_channels: !ref <cond_channels> |
|
conv_post_bias: !ref <conv_post_bias> |
|
num_embeddings: !ref <num_embeddings> |
|
embedding_dim: !ref <embedding_dim> |
|
duration_predictor: False |
|
multi_speaker: False |
|
skip_token_embedding: True |
|
pooling_type: "sum" |
|
|
|
modules: |
|
generator: !ref <generator> |
|
|
|
pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer |
|
loadables: |
|
generator: !ref <generator> |