File size: 1,585 Bytes
c98c327
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
downstream_expert:
  datarc:
    batch_size: 32
    bucket_file: /home/leo/d/datasets/LibriSpeech/len_for_bucket
    decoder_args:
      beam: 5
      beam_threshold: 25
      criterion: ctc
      decoder_type: None
      kenlm_model: /path/to/KenLM
      lexicon: /path/to/4-gram.arpa
      lm_weight: 2
      nbest: 1
      sil_weight: 0
      unk_weight: -math.inf
      word_score: -1
    dev-clean:
    - dev-clean
    dev-other:
    - dev-other
    dict_path: ./downstream/asr/char.dict
    eval_batch_size: 1
    libri_root: /home/leo/d/datasets/LibriSpeech
    num_workers: 12
    test-clean:
    - test-clean
    test-other:
    - test-other
    train:
    - train-clean-100
    train_batch_size: 32
    zero_infinity: true
  modelrc:
    RNNs:
      bidirection: true
      dim:
      - 1024
      - 1024
      dropout:
      - 0.2
      - 0.2
      layer_norm:
      - false
      - false
      module: LSTM
      proj:
      - false
      - false
      sample_rate:
      - 1
      - 1
      sample_style: concat
      total_rate: -1
    Wav2Letter:
      total_rate: 320
    project_dim: 1024
    select: RNNs
optimizer:
  lr: 0.0001
  name: TorchOptim
  torch_optim_name: Adam
runner:
  eval_dataloaders:
  - dev-clean
  eval_step: 2000
  gradient_accumulate_steps: 1
  gradient_clipping: 1
  log_step: 100
  max_keep: 1
  save_step: 5
  total_steps: 10
specaug:
  apply_freq_mask: true
  apply_time_mask: true
  apply_time_warp: true
  freq_mask_width_range:
  - 0
  - 50
  num_freq_mask: 4
  num_time_mask: 2
  time_mask_width_range:
  - 0
  - 40
  time_warp_window: 5