File size: 1,261 Bytes
4baf506 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 |
log_dir: "Models/run_timbre_norm_ctc_titanet/"
save_freq: 1
log_interval: 10
save_interval: 1000
device: "cuda"
epochs: 1000 # number of epochs for first stage training (pre-training)
batch_size: 4
batch_length: 100 # maximum duration of audio in a batch (in seconds)
max_len: 80 # maximum number of frames
pretrained_model: ""
load_only_params: False # set to true if do not want to load epoch numbers and optimizer parameters
F0_path: "modules/JDC/bst.t7"
data_params:
train_data: "./data/train.txt"
val_data: "./data/val.txt"
root_path: "./data/"
preprocess_params:
sr: 24000
spect_params:
n_fft: 2048
win_length: 1200
hop_length: 300
model_params:
fixed: True
causal: True
lstm: 2
norm_f0: True
use_gr_content_f0: False
use_gr_prosody_phone: False
use_gr_timbre_prosody: False
separate_prosody_encoder: True
n_c_codebooks: 2
timbre_norm: True
use_gr_content_global_f0: True
w2v: 'w2v-ctc'
DAC:
encoder_dim: 64
encoder_rates: [2, 5, 5, 6]
decoder_dim: 1536
decoder_rates: [ 6, 5, 5, 2 ]
sr: 24000
loss_params:
base_lr: 0.0001
discriminator_iter_start: 2000
lambda_spk: 1.0
lambda_mel: 45
lambda_f0: 1.0
lambda_uv: 1.0
|