sahita
/

lang-VoxLingua107-ecapa

@@ -1,40 +1,6 @@
-# Generated 2022-09-22 from:
-# /opt/speechbrain_LID/recipes/VoxLingua107/lang_id/hparams/train_ecapa.yaml
-# yamllint disable
-################################
-# Model: language identification with ECAPA
-# Authors: Tanel Alum������������������e, 2021
-# ################################
-# Basic parameters
-seed: 1988
-__set_seed: !apply:torch.manual_seed [1988]
-output_folder: results/epaca/1988
-save_folder: results/epaca/1988/save
-train_log: results/epaca/1988/train_log.txt
-data_folder: ./
-rir_folder: ./
-shards_url: /opt/speechbrain_LID/recipes/VoxLingua107/lang_id/data_shards
-train_meta: /opt/speechbrain_LID/recipes/VoxLingua107/lang_id/data_shards/train/meta.json
-val_meta: /opt/speechbrain_LID/recipes/VoxLingua107/lang_id/data_shards/dev/meta.json
-train_shards: /opt/speechbrain_LID/recipes/VoxLingua107/lang_id/data_shards/train/shard-{000000..000009}.tar
-val_shards: /opt/speechbrain_LID/recipes/VoxLingua107/lang_id/data_shards/dev/shard-000000.tar
-# Set to directory on a large disk if you are training on Webdataset shards hosted on the web
-#shard_cache_dir:
-ckpt_interval_minutes: 5
-# Training parameters
-number_of_epochs: 1
-lr: 0.001
-lr_final: 0.0001
-sample_rate: 16000
-sentence_len: 3 # seconds
 # Feature parameters
 n_mels: 60
 left_frames: 0
@@ -44,22 +10,13 @@ deltas: false
 # Number of languages
 out_n_neurons: 2
-train_dataloader_options:
-  num_workers: 2
-  batch_size: 128
-val_dataloader_options:
-  num_workers: 0
-  batch_size: 32
-# Functions
-compute_features: &id003 !new:speechbrain.lobes.features.Fbank
   n_mels: 60
   left_frames: 0
   right_frames: 0
   deltas: false
-embedding_model: &id004 !new:speechbrain.lobes.models.ECAPA_TDNN.ECAPA_TDNN
   input_size: 60
   channels: [1024, 1024, 1024, 1024, 3072]
   kernel_sizes: [5, 3, 3, 3, 1]
@@ -67,76 +24,35 @@ embedding_model: &id004 !new:speechbrain.lobes.models.ECAPA_TDNN.ECAPA_TDNN
   attention_channels: 128
   lin_neurons: 256
-classifier: &id005 !new:speechbrain.lobes.models.Xvector.Classifier
   input_shape: [null, null, 256]
   activation: !name:torch.nn.LeakyReLU
   lin_blocks: 1
   lin_neurons: 512
   out_neurons: 2
-epoch_counter: &id007 !new:speechbrain.utils.epoch_loop.EpochCounter
-  limit: 1
-augment_speed: &id001 !new:speechbrain.lobes.augment.TimeDomainSpecAugment
-  sample_rate: 16000
-  speeds: [90, 100, 110]
-add_rev_noise: &id002 !new:speechbrain.lobes.augment.EnvCorrupt
-  openrir_folder: ./
-  openrir_max_noise_len: 3.0    # seconds
-  reverb_prob: 0.5
-  noise_prob: 0.8
-  noise_snr_low: 0
-  noise_snr_high: 15
-  rir_scale_factor: 1.0
-# Definition of the augmentation pipeline.
-# If concat_augment = False, the augmentation techniques are applied
-# in sequence. If concat_augment = True, all the augmented signals
-# # are concatenated in a single big batch.
-augment_pipeline: [*id001, *id002]
-concat_augment: false
-mean_var_norm: &id006 !new:speechbrain.processing.features.InputNormalization
   norm_type: sentence
   std_norm: false
 modules:
-  compute_features: *id003
-  augment_speed: *id001
-  add_rev_noise: *id002
-  embedding_model: *id004
-  classifier: *id005
-  mean_var_norm: *id006
-compute_cost: !name:speechbrain.nnet.losses.nll_loss
-# compute_error: !name:speechbrain.nnet.losses.classification_error
-opt_class: !name:torch.optim.Adam
-  lr: 0.001
-  weight_decay: 0.000002
-lr_annealing: !new:speechbrain.nnet.schedulers.LinearScheduler
-  initial_value: 0.001
-  final_value: 0.0001
-  epoch_count: 1
-# Logging + checkpoints
-train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger
-  save_file: results/epaca/1988/train_log.txt
-error_stats: !name:speechbrain.utils.metric_stats.MetricStats
-  metric: !name:speechbrain.nnet.losses.classification_error
-    reduction: batch
-checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
-  checkpoints_dir: results/epaca/1988/save
-  recoverables:
-    embedding_model: *id004
-    classifier: *id005
-    normalizer: *id006
-    counter: *id007

+pretrained_path: sahita/lang-VoxLingua107-ecapa
 # Feature parameters
 n_mels: 60
 left_frames: 0
 # Number of languages
 out_n_neurons: 2
+compute_features: !new:speechbrain.lobes.features.Fbank
   n_mels: 60
   left_frames: 0
   right_frames: 0
   deltas: false
+embedding_model: !new:speechbrain.lobes.models.ECAPA_TDNN.ECAPA_TDNN
   input_size: 60
   channels: [1024, 1024, 1024, 1024, 3072]
   kernel_sizes: [5, 3, 3, 3, 1]
   attention_channels: 128
   lin_neurons: 256
+classifier: !new:speechbrain.lobes.models.Xvector.Classifier
   input_shape: [null, null, 256]
   activation: !name:torch.nn.LeakyReLU
   lin_blocks: 1
   lin_neurons: 512
   out_neurons: 2
+mean_var_norm: !new:speechbrain.processing.features.InputNormalization
   norm_type: sentence
   std_norm: false
 modules:
+  compute_features: !ref <compute_features>
+  embedding_model: !ref <embedding_model>
+  classifier: !ref <classifier>
+  mean_var_norm: !ref <mean_var_norm>
+# compute_error: !name:speechbrain.nnet.losses.classification_error
+label_encoder: !new:speechbrain.dataio.encoder.CategoricalEncoder
+pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
+    loadables:
+        embedding_model: !ref <embedding_model>
+        classifier: !ref <classifier>
+        label_encoder: !ref <label_encoder>
+    paths:
+        embedding_model: !ref <pretrained_path>/embedding_model.ckpt
+        classifier: !ref <pretrained_path>/classifier.ckpt
+        label_encoder: !ref <pretrained_path>/label_encoder.txt