File size: 4,309 Bytes
c858225 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 |
# Generated 2023-07-14 from:
# /data2/cloned_repos/speechbrain-clone/recipes/ESC50/interpret/hparams/piq.yaml
# yamllint disable
# #################################
# The recipe for training PIQ on the ESC50 dataset.
#
# Author:
# * Cem Subakan 2022, 2023
# * Francesco Paissan 2022, 2023
# (based on the SpeechBrain UrbanSound8k recipe)
# #################################
# Seed needs to be set at top of yaml, before objects with parameters are made
seed: 1234
__set_seed: !!python/object/apply:torch.manual_seed [1234]
# Set up folders for reading from and writing to
# Dataset must already exist at `audio_data_folder`
data_folder: /data2/ESC-50-master
# e.g., /localscratch/UrbanSound8K
audio_data_folder: /data2/ESC-50-master/audio
experiment_name: piq
output_folder: ./results/piq/1234
save_folder: ./results/piq/1234/save
train_log: ./results/piq/1234/train_log.txt
test_only: false
save_interpretations: true
interpret_period: 10
# Tensorboard logs
use_tensorboard: false
tensorboard_logs_folder: ./results/piq/1234/tb_logs/
# Path where data manifest files will be stored
train_annotation: /data2/ESC-50-master/manifest/train.json
valid_annotation: /data2/ESC-50-master/manifest/valid.json
test_annotation: /data2/ESC-50-master/manifest/test.json
# To standardize results, UrbanSound8k has pre-separated samples into
# 10 folds for multi-fold validation
train_fold_nums: [1, 2, 3]
valid_fold_nums: [4]
test_fold_nums: [5]
skip_manifest_creation: false
ckpt_interval_minutes: 15 # save checkpoint every N min
# Training parameters
number_of_epochs: 200
batch_size: 16
lr: 0.0002
sample_rate: 16000
use_vq: true
rec_loss_coef: 1
use_mask_output: true
mask_th: 0.35
device: cuda
# Feature parameters
n_mels: 80
# Number of classes
out_n_neurons: 50
shuffle: true
dataloader_options:
batch_size: 16
shuffle: true
num_workers: 0
epoch_counter: &id001 !new:speechbrain.utils.epoch_loop.EpochCounter
limit: 200
opt_class: !name:torch.optim.Adam
lr: 0.0002
weight_decay: 0.000002
lr_annealing: !new:speechbrain.nnet.schedulers.ReduceLROnPlateau
factor: 0.5
patience: 3
dont_halve_until_epoch: 100
# Logging + checkpoints
train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger
save_file: ./results/piq/1234/train_log.txt
checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
checkpoints_dir: ./results/piq/1234/save
recoverables:
psi_model: &id004 !new:speechbrain.lobes.models.PIQ.VectorQuantizedPSI_Audio
dim: 256
K: 1024
shared_keys: 0
activate_class_partitioning: true
use_adapter: true
adapter_reduce_dim: true
counter: *id001
use_pretrained: true
# embedding_model: !new:custom_models.Conv2dEncoder_v2
embedding_model: &id002 !new:speechbrain.lobes.models.PIQ.Conv2dEncoder_v2
dim: 256
classifier: &id003 !new:speechbrain.lobes.models.ECAPA_TDNN.Classifier
input_size: 256
out_neurons: 50
lin_blocks: 1
# Interpretation hyperparams
K: 1024
# pre-processing
n_fft: 1024
spec_mag_power: 0.5
hop_length: 11.6099
win_length: 23.2199
compute_stft: &id005 !new:speechbrain.processing.features.STFT
n_fft: 1024
hop_length: 11.6099
win_length: 23.2199
sample_rate: 16000
compute_fbank: &id006 !new:speechbrain.processing.features.Filterbank
n_mels: 80
n_fft: 1024
sample_rate: 16000
compute_istft: &id007 !new:speechbrain.processing.features.ISTFT
sample_rate: 16000
hop_length: 11.6099
win_length: 23.2199
label_encoder: !new:speechbrain.dataio.encoder.CategoricalEncoder
psi_model: *id004
modules:
compute_stft: *id005
compute_fbank: *id006
compute_istft: *id007
psi: *id004
embedding_model: !ref <embedding_model>
classifier: !ref <classifier>
embedding_model_path: fpaissan/conv2d_us8k/embedding_modelft.ckpt
classifier_model_path: fpaissan/conv2d_us8k/classifier.ckpt
pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
loadables:
embedding_model: !ref <embedding_model>
classifier: !ref <classifier>
psi: !ref <psi_model>
label_encoder: !ref <label_encoder>
paths:
embedding_model: fpaissan/conv2d_us8k/embedding_modelft.ckpt
classifier: fpaissan/conv2d_us8k/classifier.ckpt
psi: /data2/PIQ-ESC50/psi_model.ckpt
label_encoder: speechbrain/cnn14-esc50/label_encoder.txt
|