|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
sample_rate: 16000 |
|
use_vq: true |
|
rec_loss_coef: 1 |
|
use_mask_output: true |
|
mask_th: 0.35 |
|
|
|
device: cpu |
|
|
|
|
|
n_mels: 80 |
|
|
|
|
|
out_n_neurons: 50 |
|
|
|
|
|
embedding_model: &id002 !new:speechbrain.lobes.models.PIQ.Conv2dEncoder_v2 |
|
dim: 256 |
|
|
|
classifier: &id003 !new:speechbrain.lobes.models.ECAPA_TDNN.Classifier |
|
input_size: 256 |
|
out_neurons: 50 |
|
lin_blocks: 1 |
|
|
|
|
|
K: 1024 |
|
|
|
|
|
n_fft: 1024 |
|
spec_mag_power: 0.5 |
|
hop_length: 11.6099 |
|
win_length: 23.2199 |
|
compute_stft: &id005 !new:speechbrain.processing.features.STFT |
|
n_fft: 1024 |
|
hop_length: 11.6099 |
|
win_length: 23.2199 |
|
sample_rate: 16000 |
|
|
|
compute_fbank: &id006 !new:speechbrain.processing.features.Filterbank |
|
n_mels: 80 |
|
n_fft: 1024 |
|
sample_rate: 16000 |
|
|
|
compute_istft: &id007 !new:speechbrain.processing.features.ISTFT |
|
sample_rate: 16000 |
|
hop_length: 11.6099 |
|
win_length: 23.2199 |
|
|
|
label_encoder: !new:speechbrain.dataio.encoder.CategoricalEncoder |
|
psi_model: *id004 |
|
|
|
|
|
modules: |
|
compute_stft: !ref <compute_stft> |
|
compute_fbank: !ref <compute_fbank> |
|
compute_istft: !ref <compute_istft> |
|
psi: !ref <psi_model> |
|
embedding_model: !ref <embedding_model> |
|
classifier: !ref <classifier> |
|
|
|
pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer |
|
loadables: |
|
embedding_model: !ref <embedding_model> |
|
classifier: !ref <classifier> |
|
psi: !ref <psi_model> |
|
label_encoder: !ref <label_encoder> |
|
paths: |
|
embedding_model: speechbrain/PIQ-ESC50/embedding_modelft.ckpt |
|
classifier: speechbrain/PIQ-ESC50/classifier.ckpt |
|
psi: speechbrain/PIQ-ESC50/psi_model.ckpt |
|
label_encoder: speechbrain/cnn14-esc50/label_encoder.txt |