File size: 3,988 Bytes

7adc9ae

# Generated 2022-09-26 from:
# /opt/speechbrain_LID/recipes/VoxLingua107/lang_id/hparams/train_ecapa.yaml
# yamllint disable
################################
# Model: language identification with ECAPA
# Authors: Tanel Alum������������������������������������������������������e, 2021
# ################################



# Basic parameters
seed: 1988
__set_seed: !apply:torch.manual_seed [1988]
output_folder: results/epaca/1988
save_folder: results/epaca/1988/save
train_log: results/epaca/1988/train_log.txt
data_folder: ./
rir_folder: ./

shards_url: /opt/speechbrain_LID/recipes/VoxLingua107/lang_id/data_shards
train_meta: /opt/speechbrain_LID/recipes/VoxLingua107/lang_id/data_shards/train/meta.json
val_meta: /opt/speechbrain_LID/recipes/VoxLingua107/lang_id/data_shards/dev/meta.json
train_shards: /opt/speechbrain_LID/recipes/VoxLingua107/lang_id/data_shards/train/shard-{000000..000009}.tar
val_shards: /opt/speechbrain_LID/recipes/VoxLingua107/lang_id/data_shards/dev/shard-000000.tar

# Set to directory on a large disk if you are training on Webdataset shards hosted on the web
#shard_cache_dir:

ckpt_interval_minutes: 5

# Training parameters
number_of_epochs: 40
lr: 0.001
lr_final: 0.0001
sample_rate: 16000
sentence_len: 3 # seconds

# Feature parameters
n_mels: 60
left_frames: 0
right_frames: 0
deltas: false

# Number of languages
out_n_neurons: 2

train_dataloader_options:
  num_workers: 2
  batch_size: 128

val_dataloader_options:
  num_workers: 0
  batch_size: 32

# Functions
compute_features: &id003 !new:speechbrain.lobes.features.Fbank
  n_mels: 60
  left_frames: 0
  right_frames: 0
  deltas: false

embedding_model: &id004 !new:speechbrain.lobes.models.ECAPA_TDNN.ECAPA_TDNN
  input_size: 60
  channels: [1024, 1024, 1024, 1024, 3072]
  kernel_sizes: [5, 3, 3, 3, 1]
  dilations: [1, 2, 3, 4, 1]
  attention_channels: 128
  lin_neurons: 256

classifier: &id005 !new:speechbrain.lobes.models.Xvector.Classifier
  input_shape: [null, null, 256]
  activation: !name:torch.nn.LeakyReLU
  lin_blocks: 1
  lin_neurons: 512
  out_neurons: 2

epoch_counter: &id007 !new:speechbrain.utils.epoch_loop.EpochCounter
  limit: 40


augment_speed: &id001 !new:speechbrain.lobes.augment.TimeDomainSpecAugment
  sample_rate: 16000
  speeds: [90, 100, 110]


add_rev_noise: &id002 !new:speechbrain.lobes.augment.EnvCorrupt
  openrir_folder: ./
  openrir_max_noise_len: 3.0    # seconds
  reverb_prob: 0.5
  noise_prob: 0.8
  noise_snr_low: 0
  noise_snr_high: 15
  rir_scale_factor: 1.0

# Definition of the augmentation pipeline.
# If concat_augment = False, the augmentation techniques are applied
# in sequence. If concat_augment = True, all the augmented signals
# # are concatenated in a single big batch.
augment_pipeline: [*id001, *id002]

concat_augment: false

mean_var_norm: &id006 !new:speechbrain.processing.features.InputNormalization

  norm_type: sentence
  std_norm: false

modules:
  compute_features: *id003
  augment_speed: *id001
  add_rev_noise: *id002
  embedding_model: *id004
  classifier: *id005
  mean_var_norm: *id006
compute_cost: !name:speechbrain.nnet.losses.nll_loss
# compute_error: !name:speechbrain.nnet.losses.classification_error

opt_class: !name:torch.optim.Adam
  lr: 0.001
  weight_decay: 0.000002

lr_annealing: !new:speechbrain.nnet.schedulers.LinearScheduler
  initial_value: 0.001
  final_value: 0.0001
  epoch_count: 40

# Logging + checkpoints
train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger
  save_file: results/epaca/1988/train_log.txt


error_stats: !name:speechbrain.utils.metric_stats.MetricStats
  metric: !name:speechbrain.nnet.losses.classification_error
    reduction: batch

checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
  checkpoints_dir: results/epaca/1988/save
  recoverables:
    embedding_model: *id004
    classifier: *id005
    normalizer: *id006
    counter: *id007