Commit 
							
							·
						
						ca690f1
	
1
								Parent(s):
							
							5ad3c07
								
simplified hyperparams.yaml and adding hyperparams_train.yaml
Browse files- hyperparams.yaml +2 -161
- hyperparams_training.yaml +234 -0
    	
        hyperparams.yaml
    CHANGED
    
    | @@ -1,128 +1,17 @@ | |
| 1 | 
            -
            # Generated 2021-09-17 from:
         | 
| 2 | 
            -
            # /home/mila/s/subakany/speechbrain_new/recipes/WSJ0Mix/separation/snrestimator_yamls/timedom_convnet_whamr_v2_stnorm_manyseparators.yaml
         | 
| 3 | 
            -
            # yamllint disable
         | 
| 4 | 
             
            # ################################
         | 
| 5 | 
            -
            # Model:  | 
| 6 | 
            -
            #  | 
| 7 | 
            -
            # Dataset : WSJ0-2mix and WSJ0-3mix
         | 
| 8 | 
             
            # ################################
         | 
| 9 | 
            -
            #
         | 
| 10 | 
            -
            # Basic parameters
         | 
| 11 | 
            -
            # Seed needs to be set at top of yaml, before objects with parameters are made
         | 
| 12 | 
            -
            #
         | 
| 13 | 
            -
            seed: 1234
         | 
| 14 | 
            -
            __set_seed: !apply:torch.manual_seed [1234]
         | 
| 15 |  | 
| 16 | 
            -
            # Data params
         | 
| 17 | 
            -
             | 
| 18 | 
            -
            # e.g. '/yourpath/wsj0-mix/2speakers'
         | 
| 19 | 
            -
            # end with 2speakers for wsj0-2mix or 3speakers for wsj0-3mix
         | 
| 20 | 
            -
            data_folder: /miniscratch/subakany/LibriMixData_new/Libri2Mix/
         | 
| 21 | 
            -
             | 
| 22 | 
            -
            # the path for wsj0/si_tr_s/ folder -- only needed if dynamic mixing is used
         | 
| 23 | 
            -
            # e.g. /yourpath/wsj0-processed/si_tr_s/
         | 
| 24 | 
            -
            # you need to convert the original wsj0 to 8k
         | 
| 25 | 
            -
            # you can do this conversion with the script ../meta/preprocess_dynamic_mixing.py
         | 
| 26 | 
            -
            base_folder_dm: /miniscratch/subakany/LibriMixData_new/LibriSpeech/train-clean-360_processed/
         | 
| 27 | 
            -
            rir_path: /miniscratch/subakany/whamr_rirs_wav
         | 
| 28 | 
            -
             | 
| 29 | 
            -
            experiment_name: snrtrain-timedomain-sbpooling-wwhamr-lessstride-stnorm-manyseparators
         | 
| 30 | 
            -
            output_folder: results/snrtrain-timedomain-sbpooling-wwhamr-lessstride-stnorm-manyseparators/1234
         | 
| 31 | 
            -
            train_log: results/snrtrain-timedomain-sbpooling-wwhamr-lessstride-stnorm-manyseparators/1234/train_log.txt
         | 
| 32 | 
            -
            save_folder: results/snrtrain-timedomain-sbpooling-wwhamr-lessstride-stnorm-manyseparators/1234/save
         | 
| 33 | 
            -
            train_data: results/snrtrain-timedomain-sbpooling-wwhamr-lessstride-stnorm-manyseparators/1234/save/libri2mix_train-360.csv
         | 
| 34 | 
            -
            valid_data: results/snrtrain-timedomain-sbpooling-wwhamr-lessstride-stnorm-manyseparators/1234/save/libri2mix_dev.csv
         | 
| 35 | 
            -
            test_data: results/snrtrain-timedomain-sbpooling-wwhamr-lessstride-stnorm-manyseparators/1234/save/libri2mix_test.csv
         | 
| 36 | 
            -
             | 
| 37 | 
            -
            wsj_data_folder: /network/tmp1/subakany/wham_original
         | 
| 38 | 
            -
            train_wsj_data: results/snrtrain-timedomain-sbpooling-wwhamr-lessstride-stnorm-manyseparators/1234/save/wham_tr.csv
         | 
| 39 | 
            -
            test_wsj_data: results/snrtrain-timedomain-sbpooling-wwhamr-lessstride-stnorm-manyseparators/1234/save/wham_tt.csv
         | 
| 40 | 
            -
            base_folder_dm_whamr: /network/tmp1/subakany/wsj0-processed/si_tr_s
         | 
| 41 | 
            -
            use_whamr_train: true
         | 
| 42 | 
            -
            whamr_proportion: 0.6
         | 
| 43 | 
            -
             | 
| 44 | 
            -
            test_onwsj: false
         | 
| 45 | 
            -
             | 
| 46 | 
            -
            skip_prep: false
         | 
| 47 | 
            -
             | 
| 48 | 
            -
            ckpt_interval_minutes: 60
         | 
| 49 | 
            -
             | 
| 50 | 
            -
            # Experiment params
         | 
| 51 | 
            -
            auto_mix_prec: false # Set it to True for mixed precision
         | 
| 52 | 
            -
            test_only: false
         | 
| 53 | 
            -
            num_spks: 2 # set to 3 for wsj0-3mix
         | 
| 54 | 
            -
            progressbar: true
         | 
| 55 | 
            -
            save_audio: false # Save estimated sources on disk
         | 
| 56 | 
             
            sample_rate: 8000
         | 
| 57 |  | 
| 58 | 
            -
            # Training parameters
         | 
| 59 | 
            -
            N_epochs: 200
         | 
| 60 | 
            -
            batch_size: 1
         | 
| 61 | 
            -
            lr: 0.0001
         | 
| 62 | 
            -
            clip_grad_norm: 5
         | 
| 63 | 
            -
            loss_upper_lim: 999999  # this is the upper limit for an acceptable loss
         | 
| 64 | 
            -
            # if True, the training sequences are cut to a specified length
         | 
| 65 | 
            -
            limit_training_signal_len: false
         | 
| 66 | 
            -
            # this is the length of sequences if we choose to limit
         | 
| 67 | 
            -
            # the signal length of training sequences
         | 
| 68 | 
            -
            training_signal_len: 32000000
         | 
| 69 | 
            -
             | 
| 70 | 
            -
            # Set it to True to dynamically create mixtures at training time
         | 
| 71 | 
            -
            dynamic_mixing: true
         | 
| 72 | 
            -
            use_wham_noise: true
         | 
| 73 | 
            -
            use_reverb_augment: true
         | 
| 74 | 
            -
             | 
| 75 | 
            -
            # Parameters for data augmentation
         | 
| 76 | 
            -
            use_wavedrop: false
         | 
| 77 | 
            -
            use_speedperturb: true
         | 
| 78 | 
            -
            use_speedperturb_sameforeachsource: false
         | 
| 79 | 
            -
            use_rand_shift: false
         | 
| 80 | 
            -
            min_shift: -8000
         | 
| 81 | 
            -
            max_shift: 8000
         | 
| 82 | 
            -
             | 
| 83 | 
            -
            speedperturb: !new:speechbrain.lobes.augment.TimeDomainSpecAugment
         | 
| 84 | 
            -
              perturb_prob: 1.0
         | 
| 85 | 
            -
              drop_freq_prob: 0.0
         | 
| 86 | 
            -
              drop_chunk_prob: 0.0
         | 
| 87 | 
            -
              sample_rate: 8000
         | 
| 88 | 
            -
              speeds: [95, 100, 105]
         | 
| 89 | 
            -
             | 
| 90 | 
            -
            wavedrop: !new:speechbrain.lobes.augment.TimeDomainSpecAugment
         | 
| 91 | 
            -
              perturb_prob: 0.0
         | 
| 92 | 
            -
              drop_freq_prob: 1.0
         | 
| 93 | 
            -
              drop_chunk_prob: 1.0
         | 
| 94 | 
            -
              sample_rate: 8000
         | 
| 95 | 
            -
             | 
| 96 | 
            -
            # loss thresholding -- this thresholds the training loss
         | 
| 97 | 
            -
            threshold_byloss: true
         | 
| 98 | 
            -
            threshold: -30
         | 
| 99 | 
            -
             | 
| 100 | 
            -
            # Encoder parameters
         | 
| 101 | 
            -
            N_encoder_out: 256
         | 
| 102 | 
            -
            out_channels: 256
         | 
| 103 | 
            -
            kernel_size: 16
         | 
| 104 | 
            -
            kernel_stride: 8
         | 
| 105 | 
            -
             | 
| 106 | 
            -
            # Dataloader options
         | 
| 107 | 
            -
            dataloader_opts:
         | 
| 108 | 
            -
              batch_size: 1
         | 
| 109 | 
            -
              num_workers: 0
         | 
| 110 | 
            -
             | 
| 111 | 
            -
             | 
| 112 | 
             
            # Specifying the network
         | 
| 113 |  | 
| 114 | 
             
            snrmin: 0
         | 
| 115 | 
             
            snrmax: 10
         | 
| 116 | 
            -
            out_n_neurons: 16
         | 
| 117 | 
             
            use_snr_compression: true
         | 
| 118 | 
             
            separation_norm_type: stnorm
         | 
| 119 |  | 
| 120 | 
            -
            # compute_features: !new:speechbrain.lobes.features.Fbank
         | 
| 121 | 
            -
            #     n_mels: !ref <n_mels>
         | 
| 122 | 
            -
            #     left_frames: 0
         | 
| 123 | 
            -
            #     right_frames: 0
         | 
| 124 | 
            -
            #     deltas: False
         | 
| 125 | 
            -
             | 
| 126 | 
             
            latent_dim: 128
         | 
| 127 | 
             
            n_inp: 256
         | 
| 128 | 
             
            encoder: &id006 !new:speechbrain.nnet.containers.Sequential
         | 
| @@ -169,26 +58,7 @@ encoder: &id006 !new:speechbrain.nnet.containers.Sequential | |
| 169 |  | 
| 170 | 
             
            stat_pooling: !new:speechbrain.nnet.pooling.StatisticsPooling
         | 
| 171 |  | 
| 172 | 
            -
             | 
| 173 | 
            -
                    # classifier_enc: !new:speechbrain.lobes.models.ECAPA_TDNN.ECAPA_TDNN
         | 
| 174 | 
            -
                    #     input_size: !ref <n_inp>
         | 
| 175 | 
            -
                    #     channels: [1024, 1024, 1024, 1024, 3072]
         | 
| 176 | 
            -
                    #     kernel_sizes: [5, 3, 3, 3, 1]
         | 
| 177 | 
            -
                    #     dilations: [1, 2, 3, 4, 1]
         | 
| 178 | 
            -
                    #     attention_channels: 128
         | 
| 179 | 
            -
                    #     lin_neurons: 192
         | 
| 180 | 
            -
             | 
| 181 | 
            -
            #classifier_out: !new:speechbrain.lobes.models.ECAPA_TDNN.Classifier
         | 
| 182 | 
            -
            #    input_size: 192
         | 
| 183 | 
            -
            #    out_neurons: !ref <out_n_neurons>
         | 
| 184 | 
            -
            #
         | 
| 185 | 
            -
            # classifier_out: !new:speechbrain.nnet.linear.Linear
         | 
| 186 | 
            -
            #     input_size: 256
         | 
| 187 | 
            -
            #     n_neurons: 1
         | 
| 188 | 
            -
             | 
| 189 | 
             
            encoder_out: &id007 !new:speechbrain.nnet.containers.Sequential
         | 
| 190 | 
            -
                    # lr_scheduler: !ref <lr_scheduler>
         | 
| 191 | 
            -
             | 
| 192 | 
             
              input_shape: [!!null '', 256]
         | 
| 193 | 
             
              layer1: !new:speechbrain.nnet.linear.Linear
         | 
| 194 | 
             
                input_size: 256
         | 
| @@ -199,38 +69,9 @@ encoder_out: &id007 !new:speechbrain.nnet.containers.Sequential | |
| 199 | 
             
                n_neurons: 1
         | 
| 200 | 
             
              sigm: !new:torch.nn.Sigmoid
         | 
| 201 |  | 
| 202 | 
            -
             | 
| 203 | 
            -
             | 
| 204 | 
            -
            classifier_loss: !new:torch.nn.CrossEntropyLoss
         | 
| 205 | 
            -
             | 
| 206 | 
            -
            optimizer: !name:torch.optim.Adam
         | 
| 207 | 
            -
              lr: 0.0001
         | 
| 208 | 
            -
              weight_decay: 0
         | 
| 209 | 
            -
             | 
| 210 | 
            -
            loss: !name:speechbrain.nnet.losses.get_si_snr_with_pitwrapper
         | 
| 211 | 
            -
             | 
| 212 | 
            -
            lr_scheduler: !new:speechbrain.nnet.schedulers.ReduceLROnPlateau
         | 
| 213 | 
            -
              factor: 0.5
         | 
| 214 | 
            -
              patience: 2
         | 
| 215 | 
            -
              dont_halve_until_epoch: 95
         | 
| 216 | 
            -
             | 
| 217 | 
            -
            epoch_counter: &id008 !new:speechbrain.utils.epoch_loop.EpochCounter
         | 
| 218 | 
            -
              limit: 200
         | 
| 219 | 
            -
             | 
| 220 | 
             
            modules:
         | 
| 221 | 
             
              encoder: *id006
         | 
| 222 | 
             
              encoder_out: *id007
         | 
| 223 | 
            -
            checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
         | 
| 224 | 
            -
              checkpoints_dir: results/snrtrain-timedomain-sbpooling-wwhamr-lessstride-stnorm-manyseparators/1234/save
         | 
| 225 | 
            -
              recoverables:
         | 
| 226 | 
            -
                counter: *id008
         | 
| 227 | 
            -
                encoder: *id006
         | 
| 228 | 
            -
                encoder_out: *id007
         | 
| 229 | 
            -
            train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger
         | 
| 230 | 
            -
              save_file: results/snrtrain-timedomain-sbpooling-wwhamr-lessstride-stnorm-manyseparators/1234/train_log.txt
         | 
| 231 | 
            -
             | 
| 232 | 
            -
            num_separators_per_model: 3
         | 
| 233 | 
            -
            separator_base_folder: /home/mila/s/subakany/speechbrain_new/recipes/WHAMandWHAMR/separation/results/
         | 
| 234 |  | 
| 235 | 
             
            pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
         | 
| 236 | 
             
                loadables:
         | 
|  | |
|  | |
|  | |
|  | |
| 1 | 
             
            # ################################
         | 
| 2 | 
            +
            # Model: Neural SI-SNR Estimator with Pool training strategy (https://arxiv.org/pdf/2110.10812.pdf)
         | 
| 3 | 
            +
            # Dataset : LibriMix and WHAMR!
         | 
|  | |
| 4 | 
             
            # ################################
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 5 |  | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 6 | 
             
            sample_rate: 8000
         | 
| 7 |  | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 8 | 
             
            # Specifying the network
         | 
| 9 |  | 
| 10 | 
             
            snrmin: 0
         | 
| 11 | 
             
            snrmax: 10
         | 
|  | |
| 12 | 
             
            use_snr_compression: true
         | 
| 13 | 
             
            separation_norm_type: stnorm
         | 
| 14 |  | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 15 | 
             
            latent_dim: 128
         | 
| 16 | 
             
            n_inp: 256
         | 
| 17 | 
             
            encoder: &id006 !new:speechbrain.nnet.containers.Sequential
         | 
|  | |
| 58 |  | 
| 59 | 
             
            stat_pooling: !new:speechbrain.nnet.pooling.StatisticsPooling
         | 
| 60 |  | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 61 | 
             
            encoder_out: &id007 !new:speechbrain.nnet.containers.Sequential
         | 
|  | |
|  | |
| 62 | 
             
              input_shape: [!!null '', 256]
         | 
| 63 | 
             
              layer1: !new:speechbrain.nnet.linear.Linear
         | 
| 64 | 
             
                input_size: 256
         | 
|  | |
| 69 | 
             
                n_neurons: 1
         | 
| 70 | 
             
              sigm: !new:torch.nn.Sigmoid
         | 
| 71 |  | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 72 | 
             
            modules:
         | 
| 73 | 
             
              encoder: *id006
         | 
| 74 | 
             
              encoder_out: *id007
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 75 |  | 
| 76 | 
             
            pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
         | 
| 77 | 
             
                loadables:
         | 
    	
        hyperparams_training.yaml
    ADDED
    
    | @@ -0,0 +1,234 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            # ################################
         | 
| 2 | 
            +
            # Model: Neural SI-SNR Estimator with Pool training strategy (https://arxiv.org/pdf/2110.10812.pdf)
         | 
| 3 | 
            +
            # Dataset : LibriMix and WHAMR!
         | 
| 4 | 
            +
            # ################################
         | 
| 5 | 
            +
            #
         | 
| 6 | 
            +
            # Basic parameters
         | 
| 7 | 
            +
            # Seed needs to be set at top of yaml, before objects with parameters are made
         | 
| 8 | 
            +
            #
         | 
| 9 | 
            +
            seed: 1234
         | 
| 10 | 
            +
            __set_seed: !apply:torch.manual_seed [1234]
         | 
| 11 | 
            +
             | 
| 12 | 
            +
            # Data params
         | 
| 13 | 
            +
             | 
| 14 | 
            +
            # e.g. '/yourpath/wsj0-mix/2speakers'
         | 
| 15 | 
            +
            # end with 2speakers for wsj0-2mix or 3speakers for wsj0-3mix
         | 
| 16 | 
            +
            data_folder: /miniscratch/subakany/LibriMixData_new/Libri2Mix/
         | 
| 17 | 
            +
             | 
| 18 | 
            +
            # the path for wsj0/si_tr_s/ folder -- only needed if dynamic mixing is used
         | 
| 19 | 
            +
            # e.g. /yourpath/wsj0-processed/si_tr_s/
         | 
| 20 | 
            +
            # you need to convert the original wsj0 to 8k
         | 
| 21 | 
            +
            # you can do this conversion with the script ../meta/preprocess_dynamic_mixing.py
         | 
| 22 | 
            +
            base_folder_dm: /miniscratch/subakany/LibriMixData_new/LibriSpeech/train-clean-360_processed/
         | 
| 23 | 
            +
            rir_path: /miniscratch/subakany/whamr_rirs_wav
         | 
| 24 | 
            +
             | 
| 25 | 
            +
            experiment_name: snrtrain-timedomain-sbpooling-wwhamr-lessstride-stnorm-manyseparators
         | 
| 26 | 
            +
            output_folder: results/snrtrain-timedomain-sbpooling-wwhamr-lessstride-stnorm-manyseparators/1234
         | 
| 27 | 
            +
            train_log: results/snrtrain-timedomain-sbpooling-wwhamr-lessstride-stnorm-manyseparators/1234/train_log.txt
         | 
| 28 | 
            +
            save_folder: results/snrtrain-timedomain-sbpooling-wwhamr-lessstride-stnorm-manyseparators/1234/save
         | 
| 29 | 
            +
            train_data: results/snrtrain-timedomain-sbpooling-wwhamr-lessstride-stnorm-manyseparators/1234/save/libri2mix_train-360.csv
         | 
| 30 | 
            +
            valid_data: results/snrtrain-timedomain-sbpooling-wwhamr-lessstride-stnorm-manyseparators/1234/save/libri2mix_dev.csv
         | 
| 31 | 
            +
            test_data: results/snrtrain-timedomain-sbpooling-wwhamr-lessstride-stnorm-manyseparators/1234/save/libri2mix_test.csv
         | 
| 32 | 
            +
             | 
| 33 | 
            +
            wsj_data_folder: /network/tmp1/subakany/wham_original
         | 
| 34 | 
            +
            train_wsj_data: results/snrtrain-timedomain-sbpooling-wwhamr-lessstride-stnorm-manyseparators/1234/save/wham_tr.csv
         | 
| 35 | 
            +
            test_wsj_data: results/snrtrain-timedomain-sbpooling-wwhamr-lessstride-stnorm-manyseparators/1234/save/wham_tt.csv
         | 
| 36 | 
            +
            base_folder_dm_whamr: /network/tmp1/subakany/wsj0-processed/si_tr_s
         | 
| 37 | 
            +
            use_whamr_train: true
         | 
| 38 | 
            +
            whamr_proportion: 0.6
         | 
| 39 | 
            +
             | 
| 40 | 
            +
            test_onwsj: false
         | 
| 41 | 
            +
             | 
| 42 | 
            +
            skip_prep: false
         | 
| 43 | 
            +
             | 
| 44 | 
            +
            ckpt_interval_minutes: 60
         | 
| 45 | 
            +
             | 
| 46 | 
            +
            # Experiment params
         | 
| 47 | 
            +
            auto_mix_prec: false # Set it to True for mixed precision
         | 
| 48 | 
            +
            test_only: false
         | 
| 49 | 
            +
            num_spks: 2 # set to 3 for wsj0-3mix
         | 
| 50 | 
            +
            progressbar: true
         | 
| 51 | 
            +
            save_audio: false # Save estimated sources on disk
         | 
| 52 | 
            +
            sample_rate: 8000
         | 
| 53 | 
            +
             | 
| 54 | 
            +
            # Training parameters
         | 
| 55 | 
            +
            N_epochs: 200
         | 
| 56 | 
            +
            batch_size: 1
         | 
| 57 | 
            +
            lr: 0.0001
         | 
| 58 | 
            +
            clip_grad_norm: 5
         | 
| 59 | 
            +
            loss_upper_lim: 999999  # this is the upper limit for an acceptable loss
         | 
| 60 | 
            +
            # if True, the training sequences are cut to a specified length
         | 
| 61 | 
            +
            limit_training_signal_len: false
         | 
| 62 | 
            +
            # this is the length of sequences if we choose to limit
         | 
| 63 | 
            +
            # the signal length of training sequences
         | 
| 64 | 
            +
            training_signal_len: 32000000
         | 
| 65 | 
            +
             | 
| 66 | 
            +
            # Set it to True to dynamically create mixtures at training time
         | 
| 67 | 
            +
            dynamic_mixing: true
         | 
| 68 | 
            +
            use_wham_noise: true
         | 
| 69 | 
            +
            use_reverb_augment: true
         | 
| 70 | 
            +
             | 
| 71 | 
            +
            # Parameters for data augmentation
         | 
| 72 | 
            +
            use_wavedrop: false
         | 
| 73 | 
            +
            use_speedperturb: true
         | 
| 74 | 
            +
            use_speedperturb_sameforeachsource: false
         | 
| 75 | 
            +
            use_rand_shift: false
         | 
| 76 | 
            +
            min_shift: -8000
         | 
| 77 | 
            +
            max_shift: 8000
         | 
| 78 | 
            +
             | 
| 79 | 
            +
            speedperturb: !new:speechbrain.lobes.augment.TimeDomainSpecAugment
         | 
| 80 | 
            +
              perturb_prob: 1.0
         | 
| 81 | 
            +
              drop_freq_prob: 0.0
         | 
| 82 | 
            +
              drop_chunk_prob: 0.0
         | 
| 83 | 
            +
              sample_rate: 8000
         | 
| 84 | 
            +
              speeds: [95, 100, 105]
         | 
| 85 | 
            +
             | 
| 86 | 
            +
            wavedrop: !new:speechbrain.lobes.augment.TimeDomainSpecAugment
         | 
| 87 | 
            +
              perturb_prob: 0.0
         | 
| 88 | 
            +
              drop_freq_prob: 1.0
         | 
| 89 | 
            +
              drop_chunk_prob: 1.0
         | 
| 90 | 
            +
              sample_rate: 8000
         | 
| 91 | 
            +
             | 
| 92 | 
            +
            # loss thresholding -- this thresholds the training loss
         | 
| 93 | 
            +
            threshold_byloss: true
         | 
| 94 | 
            +
            threshold: -30
         | 
| 95 | 
            +
             | 
| 96 | 
            +
            # Encoder parameters
         | 
| 97 | 
            +
            N_encoder_out: 256
         | 
| 98 | 
            +
            out_channels: 256
         | 
| 99 | 
            +
            kernel_size: 16
         | 
| 100 | 
            +
            kernel_stride: 8
         | 
| 101 | 
            +
             | 
| 102 | 
            +
            # Dataloader options
         | 
| 103 | 
            +
            dataloader_opts:
         | 
| 104 | 
            +
              batch_size: 1
         | 
| 105 | 
            +
              num_workers: 0
         | 
| 106 | 
            +
             | 
| 107 | 
            +
             | 
| 108 | 
            +
            # Specifying the network
         | 
| 109 | 
            +
             | 
| 110 | 
            +
            snrmin: 0
         | 
| 111 | 
            +
            snrmax: 10
         | 
| 112 | 
            +
            out_n_neurons: 16
         | 
| 113 | 
            +
            use_snr_compression: true
         | 
| 114 | 
            +
            separation_norm_type: stnorm
         | 
| 115 | 
            +
             | 
| 116 | 
            +
            # compute_features: !new:speechbrain.lobes.features.Fbank
         | 
| 117 | 
            +
            #     n_mels: !ref <n_mels>
         | 
| 118 | 
            +
            #     left_frames: 0
         | 
| 119 | 
            +
            #     right_frames: 0
         | 
| 120 | 
            +
            #     deltas: False
         | 
| 121 | 
            +
             | 
| 122 | 
            +
            latent_dim: 128
         | 
| 123 | 
            +
            n_inp: 256
         | 
| 124 | 
            +
            encoder: &id006 !new:speechbrain.nnet.containers.Sequential
         | 
| 125 | 
            +
              input_shape: [!!null '', 2, !!null '']
         | 
| 126 | 
            +
              cnn1: !new:speechbrain.nnet.CNN.Conv1d
         | 
| 127 | 
            +
                in_channels: 2
         | 
| 128 | 
            +
                kernel_size: 4
         | 
| 129 | 
            +
                out_channels: 128
         | 
| 130 | 
            +
                stride: 1
         | 
| 131 | 
            +
                skip_transpose: true
         | 
| 132 | 
            +
                padding: valid
         | 
| 133 | 
            +
              relu1: !new:torch.nn.ReLU
         | 
| 134 | 
            +
              cnn2: !new:speechbrain.nnet.CNN.Conv1d
         | 
| 135 | 
            +
                in_channels: 128
         | 
| 136 | 
            +
                kernel_size: 4
         | 
| 137 | 
            +
                out_channels: 128
         | 
| 138 | 
            +
                stride: 2
         | 
| 139 | 
            +
                skip_transpose: true
         | 
| 140 | 
            +
                padding: valid
         | 
| 141 | 
            +
              relu2: !new:torch.nn.ReLU
         | 
| 142 | 
            +
              cnn3: !new:speechbrain.nnet.CNN.Conv1d
         | 
| 143 | 
            +
                in_channels: 128
         | 
| 144 | 
            +
                kernel_size: 4
         | 
| 145 | 
            +
                out_channels: 128
         | 
| 146 | 
            +
                stride: 2
         | 
| 147 | 
            +
                skip_transpose: true
         | 
| 148 | 
            +
                padding: valid
         | 
| 149 | 
            +
              relu3: !new:torch.nn.ReLU
         | 
| 150 | 
            +
              cnn4: !new:speechbrain.nnet.CNN.Conv1d
         | 
| 151 | 
            +
                in_channels: 128
         | 
| 152 | 
            +
                kernel_size: 4
         | 
| 153 | 
            +
                out_channels: 128
         | 
| 154 | 
            +
                stride: 2
         | 
| 155 | 
            +
                skip_transpose: true
         | 
| 156 | 
            +
                padding: valid
         | 
| 157 | 
            +
              relu4: !new:torch.nn.ReLU
         | 
| 158 | 
            +
              cnn5: !new:speechbrain.nnet.CNN.Conv1d
         | 
| 159 | 
            +
                in_channels: 128
         | 
| 160 | 
            +
                kernel_size: 4
         | 
| 161 | 
            +
                out_channels: 128
         | 
| 162 | 
            +
                stride: 2
         | 
| 163 | 
            +
                skip_transpose: true
         | 
| 164 | 
            +
                padding: valid
         | 
| 165 | 
            +
             | 
| 166 | 
            +
            stat_pooling: !new:speechbrain.nnet.pooling.StatisticsPooling
         | 
| 167 | 
            +
             | 
| 168 | 
            +
             | 
| 169 | 
            +
                    # classifier_enc: !new:speechbrain.lobes.models.ECAPA_TDNN.ECAPA_TDNN
         | 
| 170 | 
            +
                    #     input_size: !ref <n_inp>
         | 
| 171 | 
            +
                    #     channels: [1024, 1024, 1024, 1024, 3072]
         | 
| 172 | 
            +
                    #     kernel_sizes: [5, 3, 3, 3, 1]
         | 
| 173 | 
            +
                    #     dilations: [1, 2, 3, 4, 1]
         | 
| 174 | 
            +
                    #     attention_channels: 128
         | 
| 175 | 
            +
                    #     lin_neurons: 192
         | 
| 176 | 
            +
             | 
| 177 | 
            +
            #classifier_out: !new:speechbrain.lobes.models.ECAPA_TDNN.Classifier
         | 
| 178 | 
            +
            #    input_size: 192
         | 
| 179 | 
            +
            #    out_neurons: !ref <out_n_neurons>
         | 
| 180 | 
            +
            #
         | 
| 181 | 
            +
            # classifier_out: !new:speechbrain.nnet.linear.Linear
         | 
| 182 | 
            +
            #     input_size: 256
         | 
| 183 | 
            +
            #     n_neurons: 1
         | 
| 184 | 
            +
             | 
| 185 | 
            +
            encoder_out: &id007 !new:speechbrain.nnet.containers.Sequential
         | 
| 186 | 
            +
                    # lr_scheduler: !ref <lr_scheduler>
         | 
| 187 | 
            +
             | 
| 188 | 
            +
              input_shape: [!!null '', 256]
         | 
| 189 | 
            +
              layer1: !new:speechbrain.nnet.linear.Linear
         | 
| 190 | 
            +
                input_size: 256
         | 
| 191 | 
            +
                n_neurons: 256
         | 
| 192 | 
            +
              relu: !new:torch.nn.ReLU
         | 
| 193 | 
            +
              layer2: !new:speechbrain.nnet.linear.Linear
         | 
| 194 | 
            +
                input_size: 256
         | 
| 195 | 
            +
                n_neurons: 1
         | 
| 196 | 
            +
              sigm: !new:torch.nn.Sigmoid
         | 
| 197 | 
            +
             | 
| 198 | 
            +
             | 
| 199 | 
            +
             | 
| 200 | 
            +
            classifier_loss: !new:torch.nn.CrossEntropyLoss
         | 
| 201 | 
            +
             | 
| 202 | 
            +
            optimizer: !name:torch.optim.Adam
         | 
| 203 | 
            +
              lr: 0.0001
         | 
| 204 | 
            +
              weight_decay: 0
         | 
| 205 | 
            +
             | 
| 206 | 
            +
            loss: !name:speechbrain.nnet.losses.get_si_snr_with_pitwrapper
         | 
| 207 | 
            +
             | 
| 208 | 
            +
            lr_scheduler: !new:speechbrain.nnet.schedulers.ReduceLROnPlateau
         | 
| 209 | 
            +
              factor: 0.5
         | 
| 210 | 
            +
              patience: 2
         | 
| 211 | 
            +
              dont_halve_until_epoch: 95
         | 
| 212 | 
            +
             | 
| 213 | 
            +
            epoch_counter: &id008 !new:speechbrain.utils.epoch_loop.EpochCounter
         | 
| 214 | 
            +
              limit: 200
         | 
| 215 | 
            +
             | 
| 216 | 
            +
            modules:
         | 
| 217 | 
            +
              encoder: *id006
         | 
| 218 | 
            +
              encoder_out: *id007
         | 
| 219 | 
            +
            checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
         | 
| 220 | 
            +
              checkpoints_dir: results/snrtrain-timedomain-sbpooling-wwhamr-lessstride-stnorm-manyseparators/1234/save
         | 
| 221 | 
            +
              recoverables:
         | 
| 222 | 
            +
                counter: *id008
         | 
| 223 | 
            +
                encoder: *id006
         | 
| 224 | 
            +
                encoder_out: *id007
         | 
| 225 | 
            +
            train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger
         | 
| 226 | 
            +
              save_file: results/snrtrain-timedomain-sbpooling-wwhamr-lessstride-stnorm-manyseparators/1234/train_log.txt
         | 
| 227 | 
            +
             | 
| 228 | 
            +
            num_separators_per_model: 3
         | 
| 229 | 
            +
            separator_base_folder: /home/mila/s/subakany/speechbrain_new/recipes/WHAMandWHAMR/separation/results/
         | 
| 230 | 
            +
             | 
| 231 | 
            +
            pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
         | 
| 232 | 
            +
                loadables:
         | 
| 233 | 
            +
                    encoder: !ref <encoder>
         | 
| 234 | 
            +
                    encoder_out: !ref <encoder_out>
         | 

