File size: 3,952 Bytes
c3d98d9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 |
audio:
chunk_size: 261120
dim_f: 4096
dim_t: 256
hop_length: 1024
n_fft: 8192
num_channels: 2
sample_rate: 44100
min_mean_abs: 0.001
model:
act: gelu
bottleneck_factor: 4
growth: 128
norm: InstanceNorm
num_blocks_per_scale: 2
num_channels: 128
num_scales: 5
num_subbands: 4
scale:
- 2
- 2
training:
batch_size: 2
gradient_accumulation_steps: 1
grad_clip: 0
instruments:
- dry
- other
lr: 1.0e-06
patience: 4
reduce_factor: 0.93
target_instrument: null
num_epochs: 40
num_steps: 1000
q: 0.95
coarse_loss_clip: true
ema_momentum: 0.999
optimizer: adamw
read_metadata_procs: 8 # Number of processes to use during metadata reading for dataset. Can speed up metadata generation
other_fix: false # it's needed for checking on multisong dataset if other is actually instrumental
use_amp: true # enable or disable usage of mixed precision (float16) - usually it must be true
augmentations:
enable: false # enable or disable all augmentations (to fast disable if needed)
loudness: true # randomly change loudness of each stem on the range (loudness_min; loudness_max)
loudness_min: 0.5
loudness_max: 1.5
mixup: true # mix several stems of same type with some probability (only works for dataset types: 1, 2, 3)
mixup_probs: !!python/tuple # 2 additional stems of the same type (1st with prob 0.2, 2nd with prob 0.02)
- 0.2
- 0.02
mixup_loudness_min: 0.5
mixup_loudness_max: 1.5
all:
channel_shuffle: 0.5 # Set 0 or lower to disable
random_inverse: 0.05 # inverse track (better lower probability)
random_polarity: 0.5 # polarity change (multiply waveform to -1)
# pedalboard chorus block
pedalboard_chorus: 0.001
pedalboard_chorus_rate_hz_min: 1.0
pedalboard_chorus_rate_hz_max: 7.0
pedalboard_chorus_depth_min: 0.25
pedalboard_chorus_depth_max: 0.95
pedalboard_chorus_centre_delay_ms_min: 3
pedalboard_chorus_centre_delay_ms_max: 10
pedalboard_chorus_feedback_min: 0.0
pedalboard_chorus_feedback_max: 0.01
pedalboard_chorus_mix_min: 0.1
pedalboard_chorus_mix_max: 0.9
# pedalboard phazer block
pedalboard_phazer: 0.001
pedalboard_phazer_rate_hz_min: 1.0
pedalboard_phazer_rate_hz_max: 10.0
pedalboard_phazer_depth_min: 0.25
pedalboard_phazer_depth_max: 0.95
pedalboard_phazer_centre_frequency_hz_min: 200
pedalboard_phazer_centre_frequency_hz_max: 12000
pedalboard_phazer_feedback_min: 0.0
pedalboard_phazer_feedback_max: 0.5
pedalboard_phazer_mix_min: 0.1
pedalboard_phazer_mix_max: 0.9
# pedalboard pitch shift block
pedalboard_pitch_shift: 0.01
pedalboard_pitch_shift_semitones_min: -7
pedalboard_pitch_shift_semitones_max: 7
# pedalboard resample block
pedalboard_resample: 0.001
pedalboard_resample_target_sample_rate_min: 4000
pedalboard_resample_target_sample_rate_max: 44100
mp3_compression_min_bitrate: 32
mp3_compression_max_bitrate: 320
mp3_compression_backend: "lameenc"
dry:
# pedalboard distortion block
pedalboard_distortion: 0.001
pedalboard_distortion_drive_db_min: 1.0
pedalboard_distortion_drive_db_max: 25.0
tanh_distortion: 0.05
tanh_distortion_min: 0.1
tanh_distortion_max: 0.7
# pedalboard bitcrash block
pedalboard_bitcrash: 0.005
pedalboard_bitcrash_bit_depth_min: 4
pedalboard_bitcrash_bit_depth_max: 16
seven_band_parametric_eq: 0.24
seven_band_parametric_eq_min_gain_db: -9
seven_band_parametric_eq_max_gain_db: 9
gaussian_noise: 0.005
gaussian_noise_min_amplitude: 0.001
gaussian_noise_max_amplitude: 0.01
time_stretch: 0.01
time_stretch_min_rate: 0.8
time_stretch_max_rate: 1.25
other:
seven_band_parametric_eq: 0.24
seven_band_parametric_eq_min_gain_db: -9
seven_band_parametric_eq_max_gain_db: 9
inference:
batch_size: 2
dim_t: 256
num_overlap: 4 |