File size: 3,952 Bytes
c3d98d9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
audio:
  chunk_size: 261120
  dim_f: 4096
  dim_t: 256
  hop_length: 1024
  n_fft: 8192
  num_channels: 2
  sample_rate: 44100
  min_mean_abs: 0.001

model:
  act: gelu
  bottleneck_factor: 4
  growth: 128
  norm: InstanceNorm
  num_blocks_per_scale: 2
  num_channels: 128
  num_scales: 5
  num_subbands: 4
  scale:
  - 2
  - 2

training:
  batch_size: 2
  gradient_accumulation_steps: 1
  grad_clip: 0
  instruments:
  - dry
  - other
  lr: 1.0e-06
  patience: 4
  reduce_factor: 0.93
  target_instrument: null
  num_epochs: 40
  num_steps: 1000
  q: 0.95
  coarse_loss_clip: true
  ema_momentum: 0.999
  optimizer: adamw
  read_metadata_procs: 8 # Number of processes to use during metadata reading for dataset. Can speed up metadata generation
  other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
  use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true

augmentations:
  enable: false # enable or disable all augmentations (to fast disable if needed)
  loudness: true # randomly change loudness of each stem on the range (loudness_min; loudness_max)
  loudness_min: 0.5
  loudness_max: 1.5
  mixup: true # mix several stems of same type with some probability (only works for dataset types: 1, 2, 3)
  mixup_probs: !!python/tuple # 2 additional stems of the same type (1st with prob 0.2, 2nd with prob 0.02)
    - 0.2
    - 0.02
  mixup_loudness_min: 0.5
  mixup_loudness_max: 1.5

  all:
    channel_shuffle: 0.5 # Set 0 or lower to disable
    random_inverse: 0.05 # inverse track (better lower probability)
    random_polarity: 0.5 # polarity change (multiply waveform to -1)

    # pedalboard chorus block
    pedalboard_chorus: 0.001
    pedalboard_chorus_rate_hz_min: 1.0
    pedalboard_chorus_rate_hz_max: 7.0
    pedalboard_chorus_depth_min: 0.25
    pedalboard_chorus_depth_max: 0.95
    pedalboard_chorus_centre_delay_ms_min: 3
    pedalboard_chorus_centre_delay_ms_max: 10
    pedalboard_chorus_feedback_min: 0.0
    pedalboard_chorus_feedback_max: 0.01
    pedalboard_chorus_mix_min: 0.1
    pedalboard_chorus_mix_max: 0.9

    # pedalboard phazer block
    pedalboard_phazer: 0.001
    pedalboard_phazer_rate_hz_min: 1.0
    pedalboard_phazer_rate_hz_max: 10.0
    pedalboard_phazer_depth_min: 0.25
    pedalboard_phazer_depth_max: 0.95
    pedalboard_phazer_centre_frequency_hz_min: 200
    pedalboard_phazer_centre_frequency_hz_max: 12000
    pedalboard_phazer_feedback_min: 0.0
    pedalboard_phazer_feedback_max: 0.5
    pedalboard_phazer_mix_min: 0.1
    pedalboard_phazer_mix_max: 0.9

    # pedalboard pitch shift block
    pedalboard_pitch_shift: 0.01
    pedalboard_pitch_shift_semitones_min: -7
    pedalboard_pitch_shift_semitones_max: 7

    # pedalboard resample block
    pedalboard_resample: 0.001
    pedalboard_resample_target_sample_rate_min: 4000
    pedalboard_resample_target_sample_rate_max: 44100

    mp3_compression_min_bitrate: 32
    mp3_compression_max_bitrate: 320
    mp3_compression_backend: "lameenc"

  dry:
    # pedalboard distortion block
    pedalboard_distortion: 0.001
    pedalboard_distortion_drive_db_min: 1.0
    pedalboard_distortion_drive_db_max: 25.0
    
    tanh_distortion: 0.05
    tanh_distortion_min: 0.1
    tanh_distortion_max: 0.7
    # pedalboard bitcrash block
    pedalboard_bitcrash: 0.005
    pedalboard_bitcrash_bit_depth_min: 4
    pedalboard_bitcrash_bit_depth_max: 16

    seven_band_parametric_eq: 0.24
    seven_band_parametric_eq_min_gain_db: -9
    seven_band_parametric_eq_max_gain_db: 9
    
    gaussian_noise: 0.005
    gaussian_noise_min_amplitude: 0.001
    gaussian_noise_max_amplitude: 0.01
    
    time_stretch: 0.01
    time_stretch_min_rate: 0.8
    time_stretch_max_rate: 1.25
  other:
    seven_band_parametric_eq: 0.24
    seven_band_parametric_eq_min_gain_db: -9
    seven_band_parametric_eq_max_gain_db: 9

inference:
  batch_size: 2
  dim_t: 256
  num_overlap: 4