File size: 11,118 Bytes
3eed963
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
CONFIG
β”œβ”€β”€ data
β”‚   └── _target_: matcha.data.text_mel_datamodule.TextMelDataModule             
β”‚       name: ljspeech                                                          
β”‚       train_filelist_path: data/LJSpeech-1.1/train.txt                        
β”‚       valid_filelist_path: data/LJSpeech-1.1/val.txt                          
β”‚       batch_size: 32                                                          
β”‚       num_workers: 20                                                         
β”‚       pin_memory: true                                                        
β”‚       cleaners:                                                               
β”‚       - english_cleaners2                                                     
β”‚       add_blank: true                                                         
β”‚       n_spks: 1                                                               
β”‚       n_fft: 1024                                                             
β”‚       n_feats: 80                                                             
β”‚       sample_rate: 22050                                                      
β”‚       hop_length: 256                                                         
β”‚       win_length: 1024                                                        
β”‚       f_min: 0                                                                
β”‚       f_max: 8000                                                             
β”‚       data_statistics:                                                        
β”‚         mel_mean: -5.536622                                                   
β”‚         mel_std: 2.116101                                                     
β”‚       seed: 1234                                                              
β”‚       load_durations: false                                                   
β”‚                                                                               
β”œβ”€β”€ model
β”‚   └── _target_: matcha.models.matcha_tts.MatchaTTS                            
β”‚       n_vocab: 178                                                            
β”‚       n_spks: 1                                                               
β”‚       spk_emb_dim: 64                                                         
β”‚       n_feats: 80                                                             
β”‚       data_statistics:                                                        
β”‚         mel_mean: -5.536622                                                   
β”‚         mel_std: 2.116101                                                     
β”‚       out_size: null                                                          
β”‚       prior_loss: true                                                        
β”‚       use_precomputed_durations: false                                        
β”‚       encoder:                                                                
β”‚         encoder_type: RoPE Encoder                                            
β”‚         encoder_params:                                                       
β”‚           n_feats: 80                                                         
β”‚           n_channels: 192                                                     
β”‚           filter_channels: 768                                                
β”‚           filter_channels_dp: 256                                             
β”‚           n_heads: 2                                                          
β”‚           n_layers: 6                                                         
β”‚           kernel_size: 3                                                      
β”‚           p_dropout: 0.1                                                      
β”‚           spk_emb_dim: 64                                                     
β”‚           n_spks: 1                                                           
β”‚           prenet: true                                                        
β”‚         duration_predictor_params:                                            
β”‚           filter_channels_dp: 256                                             
β”‚           kernel_size: 3                                                      
β”‚           p_dropout: 0.1                                                      
β”‚       decoder:                                                                
β”‚         channels:                                                             
β”‚         - 256                                                                 
β”‚         - 256                                                                 
β”‚         dropout: 0.05                                                         
β”‚         attention_head_dim: 64                                                
β”‚         n_blocks: 1                                                           
β”‚         num_mid_blocks: 2                                                     
β”‚         num_heads: 2                                                          
β”‚         act_fn: snakebeta                                                     
β”‚       cfm:                                                                    
β”‚         name: CFM                                                             
β”‚         solver: euler                                                         
β”‚         sigma_min: 0.0001                                                     
β”‚       optimizer:                                                              
β”‚         _target_: torch.optim.Adam                                            
β”‚         _partial_: true                                                       
β”‚         lr: 0.0001                                                            
β”‚         weight_decay: 0.0                                                     
β”‚                                                                               
β”œβ”€β”€ callbacks
β”‚   └── model_checkpoint:                                                       
β”‚         _target_: lightning.pytorch.callbacks.ModelCheckpoint                 
β”‚         dirpath: /workspace/Matcha-TTS/logs/train/ljspeech/runs/2024-10-19_19-
β”‚         filename: checkpoint_{epoch:03d}                                      
β”‚         monitor: epoch                                                        
β”‚         verbose: false                                                        
β”‚         save_last: true                                                       
β”‚         save_top_k: 10                                                        
β”‚         mode: max                                                             
β”‚         auto_insert_metric_name: true                                         
β”‚         save_weights_only: false                                              
β”‚         every_n_train_steps: null                                             
β”‚         train_time_interval: null                                             
β”‚         every_n_epochs: 100                                                   
β”‚         save_on_train_epoch_end: null                                         
β”‚       model_summary:                                                          
β”‚         _target_: lightning.pytorch.callbacks.RichModelSummary                
β”‚         max_depth: 3                                                          
β”‚       rich_progress_bar:                                                      
β”‚         _target_: lightning.pytorch.callbacks.RichProgressBar                 
β”‚                                                                               
β”œβ”€β”€ logger
β”‚   └── tensorboard:                                                            
β”‚         _target_: lightning.pytorch.loggers.tensorboard.TensorBoardLogger     
β”‚         save_dir: /workspace/Matcha-TTS/logs/train/ljspeech/runs/2024-10-19_19
β”‚         name: null                                                            
β”‚         log_graph: false                                                      
β”‚         default_hp_metric: true                                               
β”‚         prefix: ''                                                            
β”‚                                                                               
β”œβ”€β”€ trainer
β”‚   └── _target_: lightning.pytorch.trainer.Trainer                             
β”‚       default_root_dir: /workspace/Matcha-TTS/logs/train/ljspeech/runs/2024-10
β”‚       max_epochs: -1                                                          
β”‚       accelerator: gpu                                                        
β”‚       devices:                                                                
β”‚       - 0                                                                     
β”‚       precision: 16-mixed                                                     
β”‚       check_val_every_n_epoch: 1                                              
β”‚       deterministic: false                                                    
β”‚       gradient_clip_val: 5.0                                                  
β”‚                                                                               
β”œβ”€β”€ paths
β”‚   └── root_dir: /workspace/Matcha-TTS                                         
β”‚       data_dir: /workspace/Matcha-TTS/data/                                   
β”‚       log_dir: /workspace/Matcha-TTS/logs/                                    
β”‚       output_dir: /workspace/Matcha-TTS/logs/train/ljspeech/runs/2024-10-19_19
β”‚       work_dir: /workspace/Matcha-TTS                                         
β”‚                                                                               
β”œβ”€β”€ extras
β”‚   └── ignore_warnings: false                                                  
β”‚       enforce_tags: true                                                      
β”‚       print_config: true                                                      
β”‚                                                                               
β”œβ”€β”€ task_name
β”‚   └── train                                                                   
β”œβ”€β”€ run_name
β”‚   └── ljspeech                                                                
β”œβ”€β”€ tags
β”‚   └── ['ljspeech']                                                            
β”œβ”€β”€ train
β”‚   └── True                                                                    
β”œβ”€β”€ test
β”‚   └── True                                                                    
β”œβ”€β”€ ckpt_path
β”‚   └── None                                                                    
└── seed
    └── 1234