cemsubakan commited on
Commit
b665af3
1 Parent(s): a0508f3

Update hyperparams.yaml

Browse files
Files changed (1) hide show
  1. hyperparams.yaml +7 -88
hyperparams.yaml CHANGED
@@ -1,6 +1,3 @@
1
- # Generated 2023-07-14 from:
2
- # /data2/cloned_repos/speechbrain-clone/recipes/ESC50/interpret/hparams/piq.yaml
3
- # yamllint disable
4
  # #################################
5
  # The recipe for training PIQ on the ESC50 dataset.
6
  #
@@ -10,47 +7,6 @@
10
  # (based on the SpeechBrain UrbanSound8k recipe)
11
  # #################################
12
 
13
- # Seed needs to be set at top of yaml, before objects with parameters are made
14
- seed: 1234
15
- __set_seed: !!python/object/apply:torch.manual_seed [1234]
16
-
17
- # Set up folders for reading from and writing to
18
- # Dataset must already exist at `audio_data_folder`
19
- data_folder: /data2/ESC-50-master
20
- # e.g., /localscratch/UrbanSound8K
21
- audio_data_folder: /data2/ESC-50-master/audio
22
-
23
- experiment_name: piq
24
- output_folder: ./results/piq/1234
25
- save_folder: ./results/piq/1234/save
26
- train_log: ./results/piq/1234/train_log.txt
27
-
28
- test_only: false
29
- save_interpretations: true
30
- interpret_period: 10
31
-
32
- # Tensorboard logs
33
- use_tensorboard: false
34
- tensorboard_logs_folder: ./results/piq/1234/tb_logs/
35
-
36
- # Path where data manifest files will be stored
37
- train_annotation: /data2/ESC-50-master/manifest/train.json
38
- valid_annotation: /data2/ESC-50-master/manifest/valid.json
39
- test_annotation: /data2/ESC-50-master/manifest/test.json
40
-
41
- # To standardize results, UrbanSound8k has pre-separated samples into
42
- # 10 folds for multi-fold validation
43
- train_fold_nums: [1, 2, 3]
44
- valid_fold_nums: [4]
45
- test_fold_nums: [5]
46
- skip_manifest_creation: false
47
-
48
- ckpt_interval_minutes: 15 # save checkpoint every N min
49
-
50
- # Training parameters
51
- number_of_epochs: 200
52
- batch_size: 16
53
- lr: 0.0002
54
  sample_rate: 16000
55
  use_vq: true
56
  rec_loss_coef: 1
@@ -65,43 +21,6 @@ n_mels: 80
65
  # Number of classes
66
  out_n_neurons: 50
67
 
68
- shuffle: true
69
- dataloader_options:
70
- batch_size: 16
71
- shuffle: true
72
- num_workers: 0
73
-
74
- epoch_counter: &id001 !new:speechbrain.utils.epoch_loop.EpochCounter
75
-
76
- limit: 200
77
-
78
- opt_class: !name:torch.optim.Adam
79
- lr: 0.0002
80
- weight_decay: 0.000002
81
-
82
- lr_annealing: !new:speechbrain.nnet.schedulers.ReduceLROnPlateau
83
- factor: 0.5
84
- patience: 3
85
- dont_halve_until_epoch: 100
86
-
87
- # Logging + checkpoints
88
- train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger
89
- save_file: ./results/piq/1234/train_log.txt
90
-
91
- checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
92
- checkpoints_dir: ./results/piq/1234/save
93
- recoverables:
94
- psi_model: &id004 !new:speechbrain.lobes.models.PIQ.VectorQuantizedPSI_Audio
95
- dim: 256
96
- K: 1024
97
- shared_keys: 0
98
- activate_class_partitioning: true
99
- use_adapter: true
100
- adapter_reduce_dim: true
101
-
102
- counter: *id001
103
- use_pretrained: true
104
-
105
  # embedding_model: !new:custom_models.Conv2dEncoder_v2
106
  embedding_model: &id002 !new:speechbrain.lobes.models.PIQ.Conv2dEncoder_v2
107
  dim: 256
@@ -111,7 +30,6 @@ classifier: &id003 !new:speechbrain.lobes.models.ECAPA_TDNN.Classifier
111
  out_neurons: 50
112
  lin_blocks: 1
113
 
114
-
115
  # Interpretation hyperparams
116
  K: 1024
117
 
@@ -138,11 +56,13 @@ compute_istft: &id007 !new:speechbrain.processing.features.ISTFT
138
 
139
  label_encoder: !new:speechbrain.dataio.encoder.CategoricalEncoder
140
  psi_model: *id004
 
 
141
  modules:
142
- compute_stft: *id005
143
- compute_fbank: *id006
144
- compute_istft: *id007
145
- psi: *id004
146
  embedding_model: !ref <embedding_model>
147
  classifier: !ref <classifier>
148
 
@@ -156,5 +76,4 @@ pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
156
  embedding_model: speechbrain/PIQ-ESC50/embedding_modelft.ckpt
157
  classifier: speechbrain/PIQ-ESC50/classifier.ckpt
158
  psi: speechbrain/PIQ-ESC50/psi_model.ckpt
159
- label_encoder: speechbrain/cnn14-esc50/label_encoder.txt
160
-
 
 
 
 
1
  # #################################
2
  # The recipe for training PIQ on the ESC50 dataset.
3
  #
 
7
  # (based on the SpeechBrain UrbanSound8k recipe)
8
  # #################################
9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  sample_rate: 16000
11
  use_vq: true
12
  rec_loss_coef: 1
 
21
  # Number of classes
22
  out_n_neurons: 50
23
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  # embedding_model: !new:custom_models.Conv2dEncoder_v2
25
  embedding_model: &id002 !new:speechbrain.lobes.models.PIQ.Conv2dEncoder_v2
26
  dim: 256
 
30
  out_neurons: 50
31
  lin_blocks: 1
32
 
 
33
  # Interpretation hyperparams
34
  K: 1024
35
 
 
56
 
57
  label_encoder: !new:speechbrain.dataio.encoder.CategoricalEncoder
58
  psi_model: *id004
59
+
60
+
61
  modules:
62
+ compute_stft: !ref <compute_stft>
63
+ compute_fbank: !ref <compute_fbank>
64
+ compute_istft: !ref <compute_istft>
65
+ psi: !ref <psi_model>
66
  embedding_model: !ref <embedding_model>
67
  classifier: !ref <classifier>
68
 
 
76
  embedding_model: speechbrain/PIQ-ESC50/embedding_modelft.ckpt
77
  classifier: speechbrain/PIQ-ESC50/classifier.ckpt
78
  psi: speechbrain/PIQ-ESC50/psi_model.ckpt
79
+ label_encoder: speechbrain/cnn14-esc50/label_encoder.txt