File size: 11,118 Bytes

3eed963

CONFIG
├── data
│   └── _target_: matcha.data.text_mel_datamodule.TextMelDataModule             
│       name: ljspeech                                                          
│       train_filelist_path: data/LJSpeech-1.1/train.txt                        
│       valid_filelist_path: data/LJSpeech-1.1/val.txt                          
│       batch_size: 32                                                          
│       num_workers: 20                                                         
│       pin_memory: true                                                        
│       cleaners:                                                               
│       - english_cleaners2                                                     
│       add_blank: true                                                         
│       n_spks: 1                                                               
│       n_fft: 1024                                                             
│       n_feats: 80                                                             
│       sample_rate: 22050                                                      
│       hop_length: 256                                                         
│       win_length: 1024                                                        
│       f_min: 0                                                                
│       f_max: 8000                                                             
│       data_statistics:                                                        
│         mel_mean: -5.536622                                                   
│         mel_std: 2.116101                                                     
│       seed: 1234                                                              
│       load_durations: false                                                   
│                                                                               
├── model
│   └── _target_: matcha.models.matcha_tts.MatchaTTS                            
│       n_vocab: 178                                                            
│       n_spks: 1                                                               
│       spk_emb_dim: 64                                                         
│       n_feats: 80                                                             
│       data_statistics:                                                        
│         mel_mean: -5.536622                                                   
│         mel_std: 2.116101                                                     
│       out_size: null                                                          
│       prior_loss: true                                                        
│       use_precomputed_durations: false                                        
│       encoder:                                                                
│         encoder_type: RoPE Encoder                                            
│         encoder_params:                                                       
│           n_feats: 80                                                         
│           n_channels: 192                                                     
│           filter_channels: 768                                                
│           filter_channels_dp: 256                                             
│           n_heads: 2                                                          
│           n_layers: 6                                                         
│           kernel_size: 3                                                      
│           p_dropout: 0.1                                                      
│           spk_emb_dim: 64                                                     
│           n_spks: 1                                                           
│           prenet: true                                                        
│         duration_predictor_params:                                            
│           filter_channels_dp: 256                                             
│           kernel_size: 3                                                      
│           p_dropout: 0.1                                                      
│       decoder:                                                                
│         channels:                                                             
│         - 256                                                                 
│         - 256                                                                 
│         dropout: 0.05                                                         
│         attention_head_dim: 64                                                
│         n_blocks: 1                                                           
│         num_mid_blocks: 2                                                     
│         num_heads: 2                                                          
│         act_fn: snakebeta                                                     
│       cfm:                                                                    
│         name: CFM                                                             
│         solver: euler                                                         
│         sigma_min: 0.0001                                                     
│       optimizer:                                                              
│         _target_: torch.optim.Adam                                            
│         _partial_: true                                                       
│         lr: 0.0001                                                            
│         weight_decay: 0.0                                                     
│                                                                               
├── callbacks
│   └── model_checkpoint:                                                       
│         _target_: lightning.pytorch.callbacks.ModelCheckpoint                 
│         dirpath: /workspace/Matcha-TTS/logs/train/ljspeech/runs/2024-10-19_19-
│         filename: checkpoint_{epoch:03d}                                      
│         monitor: epoch                                                        
│         verbose: false                                                        
│         save_last: true                                                       
│         save_top_k: 10                                                        
│         mode: max                                                             
│         auto_insert_metric_name: true                                         
│         save_weights_only: false                                              
│         every_n_train_steps: null                                             
│         train_time_interval: null                                             
│         every_n_epochs: 100                                                   
│         save_on_train_epoch_end: null                                         
│       model_summary:                                                          
│         _target_: lightning.pytorch.callbacks.RichModelSummary                
│         max_depth: 3                                                          
│       rich_progress_bar:                                                      
│         _target_: lightning.pytorch.callbacks.RichProgressBar                 
│                                                                               
├── logger
│   └── tensorboard:                                                            
│         _target_: lightning.pytorch.loggers.tensorboard.TensorBoardLogger     
│         save_dir: /workspace/Matcha-TTS/logs/train/ljspeech/runs/2024-10-19_19
│         name: null                                                            
│         log_graph: false                                                      
│         default_hp_metric: true                                               
│         prefix: ''                                                            
│                                                                               
├── trainer
│   └── _target_: lightning.pytorch.trainer.Trainer                             
│       default_root_dir: /workspace/Matcha-TTS/logs/train/ljspeech/runs/2024-10
│       max_epochs: -1                                                          
│       accelerator: gpu                                                        
│       devices:                                                                
│       - 0                                                                     
│       precision: 16-mixed                                                     
│       check_val_every_n_epoch: 1                                              
│       deterministic: false                                                    
│       gradient_clip_val: 5.0                                                  
│                                                                               
├── paths
│   └── root_dir: /workspace/Matcha-TTS                                         
│       data_dir: /workspace/Matcha-TTS/data/                                   
│       log_dir: /workspace/Matcha-TTS/logs/                                    
│       output_dir: /workspace/Matcha-TTS/logs/train/ljspeech/runs/2024-10-19_19
│       work_dir: /workspace/Matcha-TTS                                         
│                                                                               
├── extras
│   └── ignore_warnings: false                                                  
│       enforce_tags: true                                                      
│       print_config: true                                                      
│                                                                               
├── task_name
│   └── train                                                                   
├── run_name
│   └── ljspeech                                                                
├── tags
│   └── ['ljspeech']                                                            
├── train
│   └── True                                                                    
├── test
│   └── True                                                                    
├── ckpt_path
│   └── None                                                                    
└── seed
    └── 1234