File size: 3,062 Bytes
e38a896
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
task_name: train
run_name: merged_ja004_ja005
tags:
- merged_ja004_ja005
train: true
test: true
ckpt_path: /root/.local/share/matcha_tts/matcha_ljspeech.ckpt
seed: 1234
data:
  _target_: matcha.data.text_mel_datamodule.TextMelDataModule
  name: ja006
  train_filelist_path: datas/merged_ja004_ja005/train.txt
  valid_filelist_path: datas/merged_ja004_ja005/valid.txt
  batch_size: 80
  num_workers: 1
  pin_memory: true
  cleaners:
  - basic_cleaners2
  add_blank: true
  n_spks: 1
  n_fft: 1024
  n_feats: 80
  sample_rate: 22050
  hop_length: 256
  win_length: 1024
  f_min: 0
  f_max: 8000
  data_statistics:
    mel_mean: -5.794878959655762
    mel_std: 2.2488205432891846
  seed: 3000
model:
  _target_: matcha.models.matcha_tts.MatchaTTS
  n_vocab: 178
  n_spks: ${data.n_spks}
  spk_emb_dim: 64
  n_feats: 80
  data_statistics: ${data.data_statistics}
  out_size: null
  prior_loss: true
  encoder:
    encoder_type: RoPE Encoder
    encoder_params:
      n_feats: ${model.n_feats}
      n_channels: 192
      filter_channels: 768
      filter_channels_dp: 256
      n_heads: 2
      n_layers: 6
      kernel_size: 3
      p_dropout: 0.1
      spk_emb_dim: 64
      n_spks: 1
      prenet: true
    duration_predictor_params:
      filter_channels_dp: ${model.encoder.encoder_params.filter_channels_dp}
      kernel_size: 3
      p_dropout: ${model.encoder.encoder_params.p_dropout}
  decoder:
    channels:
    - 256
    - 256
    dropout: 0.05
    attention_head_dim: 64
    n_blocks: 1
    num_mid_blocks: 2
    num_heads: 2
    act_fn: snakebeta
  cfm:
    name: CFM
    solver: euler
    sigma_min: 0.0001
  optimizer:
    _target_: torch.optim.Adam
    _partial_: true
    lr: 0.0001
    weight_decay: 0.0
callbacks:
  model_checkpoint:
    _target_: lightning.pytorch.callbacks.ModelCheckpoint
    dirpath: ${paths.output_dir}/checkpoints
    filename: checkpoint_{epoch:03d}
    monitor: epoch
    verbose: false
    save_last: true
    save_top_k: 25
    mode: max
    auto_insert_metric_name: true
    save_weights_only: false
    every_n_train_steps: null
    train_time_interval: null
    every_n_epochs: 10
    save_on_train_epoch_end: null
  model_summary:
    _target_: lightning.pytorch.callbacks.RichModelSummary
    max_depth: 3
  rich_progress_bar:
    _target_: lightning.pytorch.callbacks.RichProgressBar
logger:
  tensorboard:
    _target_: lightning.pytorch.loggers.tensorboard.TensorBoardLogger
    save_dir: ${paths.output_dir}/tensorboard/
    name: null
    log_graph: false
    default_hp_metric: true
    prefix: ''
trainer:
  _target_: lightning.pytorch.trainer.Trainer
  default_root_dir: ${paths.output_dir}
  max_epochs: -1
  accelerator: gpu
  devices:
  - 0
  precision: 16-mixed
  check_val_every_n_epoch: 1
  deterministic: false
  gradient_clip_val: 5.0
paths:
  root_dir: ${oc.env:PROJECT_ROOT}
  data_dir: ${paths.root_dir}/data/
  log_dir: ${paths.root_dir}/logs/
  output_dir: ${hydra:runtime.output_dir}
  work_dir: ${hydra:runtime.cwd}
extras:
  ignore_warnings: false
  enforce_tags: true
  print_config: true