|
{ |
|
"base_config": "config/svc/base.json", |
|
"model": { |
|
"condition_encoder": { |
|
"merge_mode": "add", |
|
|
|
"use_f0": true, |
|
"use_uv": true, |
|
"use_energy": true, |
|
|
|
"input_melody_dim": 1, |
|
"n_bins_melody": 256, |
|
"output_melody_dim": 384, |
|
"input_loudness_dim": 1, |
|
"n_bins_loudness": 256, |
|
"output_loudness_dim": 384, |
|
|
|
"use_whisper": false, |
|
"use_contentvec": false, |
|
"use_wenet": false, |
|
"use_mert": false, |
|
"whisper_dim": 1024, |
|
"contentvec_dim": 256, |
|
"mert_dim": 256, |
|
"wenet_dim": 512, |
|
"content_encoder_dim": 384, |
|
|
|
"output_singer_dim": 384, |
|
"singer_table_size": 512, |
|
"use_spkid": true |
|
}, |
|
"diffusion": { |
|
"scheduler": "ddpm", |
|
"scheduler_settings": { |
|
"num_train_timesteps": 1000, |
|
"beta_start": 1.0e-4, |
|
"beta_end": 0.02, |
|
"beta_schedule": "linear" |
|
}, |
|
|
|
"step_encoder": { |
|
"dim_raw_embedding": 128, |
|
"dim_hidden_layer": 512, |
|
"activation": "SiLU", |
|
"num_layer": 2, |
|
"max_period": 10000 |
|
}, |
|
|
|
"model_type": "bidilconv", |
|
|
|
"bidilconv": { |
|
"base_channel": 384, |
|
"n_res_block": 20, |
|
"conv_kernel_size": 3, |
|
"dilation_cycle_length": 4, |
|
|
|
"conditioner_size": 384 |
|
}, |
|
"unet2d": { |
|
"in_channels": 1, |
|
"out_channels": 1, |
|
"down_block_types": [ |
|
"CrossAttnDownBlock2D", |
|
"CrossAttnDownBlock2D", |
|
"CrossAttnDownBlock2D", |
|
"DownBlock2D" |
|
], |
|
"mid_block_type": "UNetMidBlock2DCrossAttn", |
|
"up_block_types": [ |
|
"UpBlock2D", |
|
"CrossAttnUpBlock2D", |
|
"CrossAttnUpBlock2D", |
|
"CrossAttnUpBlock2D" |
|
], |
|
"only_cross_attention": false |
|
} |
|
} |
|
}, |
|
"train": { |
|
|
|
"batch_size": 64, |
|
"gradient_accumulation_step": 1, |
|
"max_epoch": -1, |
|
|
|
"save_checkpoint_stride": [ |
|
5, |
|
20 |
|
], |
|
|
|
"keep_last": [ |
|
3, |
|
-1 |
|
], |
|
|
|
"run_eval": [ |
|
false, |
|
true |
|
], |
|
|
|
|
|
"random_seed": 10086, |
|
|
|
"sampler": { |
|
"holistic_shuffle": true, |
|
"drop_last": true |
|
}, |
|
|
|
"dataloader": { |
|
"num_worker": 32, |
|
"pin_memory": true |
|
}, |
|
|
|
"tracker": [ |
|
"tensorboard" |
|
|
|
|
|
|
|
], |
|
|
|
"optimizer": "AdamW", |
|
"adamw": { |
|
"lr": 4.0e-4 |
|
|
|
}, |
|
|
|
"scheduler": "ReduceLROnPlateau", |
|
"reducelronplateau": { |
|
"factor": 0.8, |
|
"patience": 10, |
|
|
|
"min_lr": 1.0e-4 |
|
} |
|
}, |
|
"inference": { |
|
"diffusion": { |
|
"scheduler": "pndm", |
|
"scheduler_settings": { |
|
"num_inference_timesteps": 1000 |
|
} |
|
} |
|
} |
|
} |