youngsheen commited on
Commit
5df9004
1 Parent(s): 44aa067
vq_audio_log/simvq_65k/1second/config.yaml ADDED
@@ -0,0 +1,145 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # lightning.pytorch==2.2.5
2
+ seed_everything: 0
3
+ trainer:
4
+ accelerator: gpu
5
+ strategy: ddp_find_unused_parameters_true
6
+ devices: 2
7
+ num_nodes: 1
8
+ precision: 16-mixed
9
+ logger:
10
+ class_path: lightning.pytorch.loggers.TensorBoardLogger
11
+ init_args:
12
+ save_dir: vq_audio_log/simvq_65k
13
+ name: null
14
+ version: 1second
15
+ log_graph: false
16
+ default_hp_metric: true
17
+ prefix: ''
18
+ sub_dir: null
19
+ comment: ''
20
+ purge_step: null
21
+ max_queue: 10
22
+ flush_secs: 120
23
+ filename_suffix: ''
24
+ callbacks:
25
+ - class_path: lightning.pytorch.callbacks.ModelCheckpoint
26
+ init_args:
27
+ dirpath: vq_audio_log/simvq_65k
28
+ filename: null
29
+ monitor: null
30
+ verbose: false
31
+ save_last: null
32
+ save_top_k: -1
33
+ save_weights_only: false
34
+ mode: min
35
+ auto_insert_metric_name: true
36
+ every_n_train_steps: null
37
+ train_time_interval: null
38
+ every_n_epochs: null
39
+ save_on_train_epoch_end: null
40
+ enable_version_counter: true
41
+ - class_path: lightning.pytorch.callbacks.LearningRateMonitor
42
+ init_args:
43
+ logging_interval: step
44
+ log_momentum: false
45
+ log_weight_decay: false
46
+ fast_dev_run: false
47
+ max_epochs: 50
48
+ min_epochs: null
49
+ max_steps: -1
50
+ min_steps: null
51
+ max_time: null
52
+ limit_train_batches: null
53
+ limit_val_batches: null
54
+ limit_test_batches: null
55
+ limit_predict_batches: null
56
+ overfit_batches: 0.0
57
+ val_check_interval: null
58
+ check_val_every_n_epoch: 1
59
+ num_sanity_val_steps: 0
60
+ log_every_n_steps: 100
61
+ enable_checkpointing: null
62
+ enable_progress_bar: null
63
+ enable_model_summary: null
64
+ accumulate_grad_batches: 1
65
+ gradient_clip_val: null
66
+ gradient_clip_algorithm: null
67
+ deterministic: null
68
+ benchmark: null
69
+ inference_mode: true
70
+ use_distributed_sampler: true
71
+ profiler: null
72
+ detect_anomaly: false
73
+ barebones: false
74
+ plugins: null
75
+ sync_batchnorm: false
76
+ reload_dataloaders_every_n_epochs: 0
77
+ default_root_dir: null
78
+ ckpt_path: null
79
+ model:
80
+ class_path: taming.models.vq_audio.VQModel
81
+ init_args:
82
+ ddconfig:
83
+ causal: true
84
+ dimension: 512
85
+ lossconfig:
86
+ target: taming.modules.losses.stft.VQSTFTWithDiscriminator
87
+ params:
88
+ disc_conditional: false
89
+ disc_in_channels: 1
90
+ disc_start: 0
91
+ codebook_enlarge_ratio: 0
92
+ codebook_enlarge_steps: 2000
93
+ sample_rate: 24000
94
+ commit_weight: 1000.0
95
+ gen_loss_weight: 1.0
96
+ mel_loss_coeff: 45.0
97
+ mrd_loss_coeff: 1.0
98
+ quantconfig:
99
+ target: taming.modules.vqvae.quantize.SimVQ1D
100
+ params:
101
+ n_e: 65536
102
+ e_dim: 512
103
+ beta: 0.25
104
+ legacy: false
105
+ sample_rate: 24000
106
+ target_bandwidths: null
107
+ audio_normalize: false
108
+ segment: None
109
+ ckpt_path: null
110
+ ignore_keys: []
111
+ colorize_nlabels: null
112
+ monitor: null
113
+ learning_rate: 0.0001
114
+ warmup_epochs: 1.0
115
+ scheduler_type: None
116
+ min_learning_rate: 0
117
+ use_ema: true
118
+ stage: null
119
+ data:
120
+ class_path: main.PadDataModuleFromConfig
121
+ init_args:
122
+ batch_size: 64
123
+ train:
124
+ target: taming.data.libritts.LibriTTSTrain
125
+ params:
126
+ config:
127
+ sample_rate: 24000
128
+ channels: 1
129
+ clip_seconds: 1
130
+ validation:
131
+ target: taming.data.libritts.LibriTTSDev
132
+ params:
133
+ config:
134
+ sample_rate: 24000
135
+ channels: 1
136
+ clip_seconds: 1
137
+ test:
138
+ target: taming.data.libritts.LibriTTSTest
139
+ params:
140
+ config:
141
+ sample_rate: 24000
142
+ channels: 1
143
+ clip_seconds: -1
144
+ wrap: false
145
+ num_workers: 8
vq_audio_log/simvq_65k/epoch=49-step=138600.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:431b93fefa74b10424bc8ed13de579c109d8c95b44ca5156aea681fe9b63938a
3
+ size 1454841294
vq_audio_log/simvq_8k/1second/config.yaml ADDED
@@ -0,0 +1,145 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # lightning.pytorch==2.2.5
2
+ seed_everything: 0
3
+ trainer:
4
+ accelerator: gpu
5
+ strategy: ddp_find_unused_parameters_true
6
+ devices: 2
7
+ num_nodes: 1
8
+ precision: 16-mixed
9
+ logger:
10
+ class_path: lightning.pytorch.loggers.TensorBoardLogger
11
+ init_args:
12
+ save_dir: vq_audio_log/simvq_8k
13
+ name: null
14
+ version: 1second
15
+ log_graph: false
16
+ default_hp_metric: true
17
+ prefix: ''
18
+ sub_dir: null
19
+ comment: ''
20
+ purge_step: null
21
+ max_queue: 10
22
+ flush_secs: 120
23
+ filename_suffix: ''
24
+ callbacks:
25
+ - class_path: lightning.pytorch.callbacks.ModelCheckpoint
26
+ init_args:
27
+ dirpath: vq_audio_log/simvq_8k
28
+ filename: null
29
+ monitor: null
30
+ verbose: false
31
+ save_last: null
32
+ save_top_k: -1
33
+ save_weights_only: false
34
+ mode: min
35
+ auto_insert_metric_name: true
36
+ every_n_train_steps: null
37
+ train_time_interval: null
38
+ every_n_epochs: null
39
+ save_on_train_epoch_end: null
40
+ enable_version_counter: true
41
+ - class_path: lightning.pytorch.callbacks.LearningRateMonitor
42
+ init_args:
43
+ logging_interval: step
44
+ log_momentum: false
45
+ log_weight_decay: false
46
+ fast_dev_run: false
47
+ max_epochs: 50
48
+ min_epochs: null
49
+ max_steps: -1
50
+ min_steps: null
51
+ max_time: null
52
+ limit_train_batches: null
53
+ limit_val_batches: null
54
+ limit_test_batches: null
55
+ limit_predict_batches: null
56
+ overfit_batches: 0.0
57
+ val_check_interval: null
58
+ check_val_every_n_epoch: 1
59
+ num_sanity_val_steps: 0
60
+ log_every_n_steps: 100
61
+ enable_checkpointing: null
62
+ enable_progress_bar: null
63
+ enable_model_summary: null
64
+ accumulate_grad_batches: 1
65
+ gradient_clip_val: null
66
+ gradient_clip_algorithm: null
67
+ deterministic: null
68
+ benchmark: null
69
+ inference_mode: true
70
+ use_distributed_sampler: true
71
+ profiler: null
72
+ detect_anomaly: false
73
+ barebones: false
74
+ plugins: null
75
+ sync_batchnorm: false
76
+ reload_dataloaders_every_n_epochs: 0
77
+ default_root_dir: null
78
+ ckpt_path: null
79
+ model:
80
+ class_path: taming.models.vq_audio.VQModel
81
+ init_args:
82
+ ddconfig:
83
+ causal: true
84
+ dimension: 512
85
+ lossconfig:
86
+ target: taming.modules.losses.stft.VQSTFTWithDiscriminator
87
+ params:
88
+ disc_conditional: false
89
+ disc_in_channels: 1
90
+ disc_start: 0
91
+ codebook_enlarge_ratio: 0
92
+ codebook_enlarge_steps: 2000
93
+ sample_rate: 24000
94
+ commit_weight: 1000.0
95
+ gen_loss_weight: 1.0
96
+ mel_loss_coeff: 45.0
97
+ mrd_loss_coeff: 1.0
98
+ quantconfig:
99
+ target: taming.modules.vqvae.quantize.SimVQ1D
100
+ params:
101
+ n_e: 8192
102
+ e_dim: 512
103
+ beta: 0.25
104
+ legacy: false
105
+ sample_rate: 24000
106
+ target_bandwidths: null
107
+ audio_normalize: false
108
+ segment: None
109
+ ckpt_path: null
110
+ ignore_keys: []
111
+ colorize_nlabels: null
112
+ monitor: null
113
+ learning_rate: 0.0001
114
+ warmup_epochs: 1.0
115
+ scheduler_type: None
116
+ min_learning_rate: 0
117
+ use_ema: true
118
+ stage: null
119
+ data:
120
+ class_path: main.PadDataModuleFromConfig
121
+ init_args:
122
+ batch_size: 64
123
+ train:
124
+ target: taming.data.libritts.LibriTTSTrain
125
+ params:
126
+ config:
127
+ sample_rate: 24000
128
+ channels: 1
129
+ clip_seconds: 1
130
+ validation:
131
+ target: taming.data.libritts.LibriTTSDev
132
+ params:
133
+ config:
134
+ sample_rate: 24000
135
+ channels: 1
136
+ clip_seconds: 1
137
+ test:
138
+ target: taming.data.libritts.LibriTTSTest
139
+ params:
140
+ config:
141
+ sample_rate: 24000
142
+ channels: 1
143
+ clip_seconds: -1
144
+ wrap: false
145
+ num_workers: 8
vq_audio_log/simvq_8k/epoch=49-step=138600.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e7b2be972941d67fbc41708f44359798b8a95e9ad38719f448d580744414e898
3
+ size 1337400782