TylorShine commited on
Commit
ea1f21b
1 Parent(s): c85f28b

Upload 3 files

Browse files
Files changed (3) hide show
  1. config.yaml +74 -0
  2. model_0.pt +3 -0
  3. spk_info.npz +3 -0
config.yaml ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ data:
2
+ block_size: 512
3
+ dataset_path: ../datasets/vctk-partial
4
+ duration: 1.8
5
+ encoder: dpwavlmbase
6
+ encoder_ckpt: models/pretrained/dphubert/DPWavLM-sp0.75.pth
7
+ encoder_hop_size: 320
8
+ encoder_out_channels: 768
9
+ encoder_sample_rate: 16000
10
+ extensions:
11
+ - wav
12
+ f0_extractor: rmvpe
13
+ f0_max: 1200
14
+ f0_min: 65
15
+ sampling_rate: 44100
16
+ spk_embed_channels: 256
17
+ spk_embed_encoder: pyannote.audio
18
+ spk_embed_encoder_ckpt: ./models/pretrained/pyannote.audio/wespeaker-voxceleb-resnet34-LM/pytorch_model.bin
19
+ spk_embed_encoder_sample_rate: 16000
20
+ volume_window_size: 8
21
+ device: cuda
22
+ env:
23
+ expdir: ../datasets/exp/vctk-partial
24
+ gpu_id: 0
25
+ loss:
26
+ beta: 0.8
27
+ fft_max: 2048
28
+ fft_min: 256
29
+ n_scale: 4
30
+ overlap: 0.5
31
+ use_dual_scale: false
32
+ use_dual_scale_log_freq: true
33
+ model:
34
+ f0_input_variance: 0.0
35
+ f0_offset_size_downsamples: 8
36
+ harmonic_env_size_downsamples: 8
37
+ no_use_embed_conv: false
38
+ noise_env_size_downsamples: 8
39
+ noise_seed: 289
40
+ noise_to_harmonic_phase: true
41
+ type: CombSubMinimumNoisedPhase
42
+ units_hidden_channels: 256
43
+ units_layers:
44
+ - - 10
45
+ - 11
46
+ use_f0_offset: true
47
+ use_harmonic_env: false
48
+ use_noise_env: true
49
+ use_speaker_embed: true
50
+ win_length: 2048
51
+ train:
52
+ amp_dtype: fp32
53
+ batch_size: 48
54
+ cache_all_data: true
55
+ cache_device: cuda
56
+ cache_fp16: true
57
+ epochs: 50000
58
+ frame_hop_random_max: 64
59
+ frame_hop_random_min: 32
60
+ interval_log: 10
61
+ interval_val: 2000
62
+ loss_variation: 0.1
63
+ low_similar_loss_variation: 0.7
64
+ lr: 0.0005
65
+ num_workers: 2
66
+ only_u2c_stack: false
67
+ save_opt: false
68
+ sched_cooldown: 2
69
+ sched_factor: 0.5
70
+ sched_min_lr: 3.0e-06
71
+ sched_patience: 30
72
+ sched_threshold: 1.0e-05
73
+ sched_threshold_mode: rel
74
+ weight_decay: 0
model_0.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f967f9047c1bbc68bef4f9919745309d0800603de73e427827487adc0d3cf0d
3
+ size 9591794
spk_info.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7bb67178e37355c9efa7b8ecf8611bb365da4248080d2249756e863767bb97e9
3
+ size 153545