crlandsc commited on
Commit
bcb1f8a
1 Parent(s): 597e28d

uploaded vocals model

Browse files
Files changed (2) hide show
  1. hparams.yaml +158 -0
  2. vocals.ckpt +3 -0
hparams.yaml ADDED
@@ -0,0 +1,158 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ sr: 44100
3
+ n_fft: 2048
4
+ bandsplits:
5
+ - - 1000
6
+ - 100
7
+ - - 4000
8
+ - 250
9
+ - - 8000
10
+ - 500
11
+ - - 16000
12
+ - 1000
13
+ - - 20000
14
+ - 2000
15
+ bottleneck_layer: rnn
16
+ t_timesteps: 263
17
+ fc_dim: 128
18
+ rnn_dim: 256
19
+ rnn_type: LSTM
20
+ bidirectional: true
21
+ num_layers: 12
22
+ mlp_dim: 512
23
+ return_mask: false
24
+ complex_as_channel: true
25
+ is_mono: false
26
+ train_dataset:
27
+ file_dir: /home/crlandsc/Music-Demixing-with-Band-Split-RNN/datasets/Vocals
28
+ txt_dir: files/
29
+ txt_path: null
30
+ target: vocals
31
+ is_training: true
32
+ is_mono: false
33
+ sr: 44100
34
+ preload_dataset: false
35
+ silent_prob: 0.1
36
+ mix_prob: 0.25
37
+ mix_tgt_too: false
38
+ test_dataset:
39
+ in_fp: /home/crlandsc/Music-Demixing-with-Band-Split-RNN/datasets/Vocals
40
+ target: vocals
41
+ is_mono: false
42
+ sr: 44100
43
+ win_size: 3
44
+ hop_size: 0.5
45
+ batch_size: 4
46
+ window: null
47
+ sad:
48
+ sr: 44100
49
+ window_size_in_sec: 6
50
+ overlap_ratio: 0.5
51
+ n_chunks_per_segment: 10
52
+ eps: 1.0e-05
53
+ gamma: 0.001
54
+ threshold_max_quantile: 0.15
55
+ threshold_segment: 0.5
56
+ augmentations:
57
+ randomcrop:
58
+ _target_: data.augmentations.RandomCrop
59
+ p: 1
60
+ chunk_size_sec: 3
61
+ sr: 44100
62
+ window_stft: 2048
63
+ hop_stft: 512
64
+ gainscale:
65
+ _target_: data.augmentations.GainScale
66
+ p: 0.5
67
+ min_db: -10.0
68
+ max_db: 10.0
69
+ featurizer:
70
+ direct_transform:
71
+ _target_: torchaudio.transforms.Spectrogram
72
+ n_fft: 2048
73
+ win_length: 2048
74
+ hop_length: 512
75
+ power: null
76
+ inverse_transform:
77
+ _target_: torchaudio.transforms.InverseSpectrogram
78
+ n_fft: 2048
79
+ win_length: 2048
80
+ hop_length: 512
81
+ callbacks:
82
+ lr_monitor:
83
+ _target_: pytorch_lightning.callbacks.LearningRateMonitor
84
+ logging_interval: epoch
85
+ model_ckpt:
86
+ _target_: pytorch_lightning.callbacks.ModelCheckpoint
87
+ monitor: train/loss
88
+ mode: min
89
+ save_top_k: 4
90
+ dirpath: /home/crlandsc/Music-Demixing-with-Band-Split-RNN/src/logs/bandsplitrnn/2023-04-28_17-51/weights
91
+ filename: epoch{epoch:02d}-train_loss{train/loss:.2f}
92
+ auto_insert_metric_name: false
93
+ model_ckpt_usdr:
94
+ _target_: pytorch_lightning.callbacks.ModelCheckpoint
95
+ monitor: train/usdr
96
+ mode: max
97
+ save_top_k: 4
98
+ dirpath: /home/crlandsc/Music-Demixing-with-Band-Split-RNN/src/logs/bandsplitrnn/2023-04-28_17-51/weights
99
+ filename: epoch{epoch:02d}-train_usdr{train/usdr:.2f}
100
+ auto_insert_metric_name: false
101
+ ema:
102
+ _target_: utils.callbacks.EMA
103
+ decay: 0.9999
104
+ validate_original_weights: false
105
+ every_n_steps: 1
106
+ logger:
107
+ tensorboard:
108
+ _target_: pytorch_lightning.loggers.TensorBoardLogger
109
+ save_dir: /home/crlandsc/Music-Demixing-with-Band-Split-RNN/src/logs/bandsplitrnn/2023-04-28_17-51/tb_logs
110
+ name: ''
111
+ version: ''
112
+ log_graph: false
113
+ default_hp_metric: false
114
+ prefix: ''
115
+ wandb:
116
+ _target_: pytorch_lightning.loggers.WandbLogger
117
+ project: MDX_BSRNN_23
118
+ name: vocals
119
+ save_dir: wandb_logs
120
+ offline: false
121
+ id: null
122
+ log_model: false
123
+ prefix: ''
124
+ job_type: train
125
+ group: ''
126
+ tags: []
127
+ train_loader:
128
+ batch_size: 8
129
+ num_workers: 12
130
+ shuffle: true
131
+ drop_last: true
132
+ val_loader:
133
+ batch_size: 2
134
+ num_workers: 8
135
+ shuffle: false
136
+ drop_last: false
137
+ opt:
138
+ _target_: torch.optim.Adam
139
+ lr: 0.001
140
+ sch:
141
+ warmup_step: 10
142
+ alpha: 0.1
143
+ gamma: 0.9899494936611665
144
+ ckpt_path: logs/bandsplitrnn/2023-04-28_14-12/weights/epoch217-train_usdr7.07.ckpt
145
+ trainer:
146
+ fast_dev_run: false
147
+ min_epochs: 100
148
+ max_epochs: 500
149
+ log_every_n_steps: 10
150
+ accelerator: auto
151
+ devices: auto
152
+ gradient_clip_val: 5
153
+ precision: 32
154
+ enable_progress_bar: true
155
+ benchmark: true
156
+ deterministic: false
157
+ experiment_dirname: bandsplitrnn
158
+ wandb_api_key: d5c4447e39b2b10b95f05f907d57845ded16bc13
vocals.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3de5ecab576823eb8446d79599d871449b1559073a1e610912e0a6f56a8daf8
3
+ size 519814907