Sucial commited on
Commit
b417b0e
1 Parent(s): d348568

Upload 2 files

Browse files
dereverb-echo_128_4_4_mel_band_roformer.yaml ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ audio:
2
+ chunk_size: 352800
3
+ dim_f: 1024
4
+ dim_t: 801 # don't work (use in model)
5
+ hop_length: 441 # don't work (use in model)
6
+ n_fft: 2048
7
+ num_channels: 2
8
+ sample_rate: 44100
9
+ min_mean_abs: 0.000
10
+
11
+ model:
12
+ dim: 128
13
+ depth: 4
14
+ stereo: true
15
+ num_stems: 1
16
+ time_transformer_depth: 1
17
+ freq_transformer_depth: 1
18
+ linear_transformer_depth: 0
19
+ num_bands: 60
20
+ dim_head: 64
21
+ heads: 8
22
+ attn_dropout: 0.1
23
+ ff_dropout: 0.1
24
+ flash_attn: True
25
+ dim_freqs_in: 1025
26
+ sample_rate: 44100 # needed for mel filter bank from librosa
27
+ stft_n_fft: 2048
28
+ stft_hop_length: 441
29
+ stft_win_length: 2048
30
+ stft_normalized: False
31
+ mask_estimator_depth: 2
32
+ multi_stft_resolution_loss_weight: 1.0
33
+ multi_stft_resolutions_window_sizes: !!python/tuple
34
+ - 4096
35
+ - 2048
36
+ - 1024
37
+ - 512
38
+ - 256
39
+ multi_stft_hop_size: 147
40
+ multi_stft_normalized: False
41
+
42
+
43
+ training:
44
+ batch_size: 3
45
+ gradient_accumulation_steps: 8
46
+ grad_clip: 0
47
+ instruments:
48
+ - dry
49
+ - other
50
+ lr: 1.0e-04
51
+ patience: 2
52
+ reduce_factor: 0.95
53
+ target_instrument: dry
54
+ num_epochs: 1000
55
+ num_steps: 1000
56
+ q: 0.95
57
+ coarse_loss_clip: true
58
+ ema_momentum: 0.999
59
+ optimizer: adam
60
+ other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
61
+ use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
62
+
63
+ augmentations:
64
+ enable: true # enable or disable all augmentations (to fast disable if needed)
65
+ loudness: true # randomly change loudness of each stem on the range (loudness_min; loudness_max)
66
+ loudness_min: 0.5
67
+ loudness_max: 1.5
68
+ mixup: false # mix several stems of same type with some probability (only works for dataset types: 1, 2, 3)
69
+ mixup_probs: !!python/tuple # 2 additional stems of the same type (1st with prob 0.2, 2nd with prob 0.02)
70
+ - 0.2
71
+ - 0.02
72
+ mixup_loudness_min: 0.5
73
+ mixup_loudness_max: 1.5
74
+
75
+ inference:
76
+ batch_size: 2
77
+ dim_t: 801
78
+ num_overlap: 4
dereverb-echo_128_4_4_mel_band_roformer_sdr_dry_12.4235.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aea1752884622db55f6aa28808971088bcd1b833a3c0ec25b3376c6291b97b6d
3
+ size 128415988