drewThomasson commited on
Commit
27fcdf5
1 Parent(s): 0d8f467

Upload 8 files

Browse files
tts_models/.DS_Store ADDED
Binary file (6.15 kB). View file
 
tts_models/voice_conversion_models--multilingual--vctk--freevc24/.DS_Store ADDED
Binary file (6.15 kB). View file
 
tts_models/voice_conversion_models--multilingual--vctk--freevc24/._config.json ADDED
Binary file (386 Bytes). View file
 
tts_models/voice_conversion_models--multilingual--vctk--freevc24/._model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6fa468ed77a9726751b4d321242e069c77dbcd8ecb2e30a212dc0f38f69b852a
3
+ size 230
tts_models/voice_conversion_models--multilingual--vctk--freevc24/._voice_conversion_models--multilingual--vctk--freevc24 ADDED
Binary file (330 Bytes). View file
 
tts_models/voice_conversion_models--multilingual--vctk--freevc24/config.json ADDED
@@ -0,0 +1,204 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "output_path": "output",
3
+ "logger_uri": null,
4
+ "run_name": "run",
5
+ "project_name": null,
6
+ "run_description": "\ud83d\udc38Coqui trainer run.",
7
+ "print_step": 25,
8
+ "plot_step": 100,
9
+ "model_param_stats": false,
10
+ "wandb_entity": null,
11
+ "dashboard_logger": "tensorboard",
12
+ "log_model_step": null,
13
+ "save_step": 10000,
14
+ "save_n_checkpoints": 5,
15
+ "save_checkpoints": true,
16
+ "save_all_best": false,
17
+ "save_best_after": 10000,
18
+ "target_loss": null,
19
+ "print_eval": false,
20
+ "test_delay_epochs": 0,
21
+ "run_eval": true,
22
+ "run_eval_steps": null,
23
+ "distributed_backend": "nccl",
24
+ "distributed_url": "tcp://localhost:54321",
25
+ "mixed_precision": false,
26
+ "epochs": 1000,
27
+ "batch_size": 32,
28
+ "eval_batch_size": 16,
29
+ "grad_clip": [
30
+ 1000,
31
+ 1000
32
+ ],
33
+ "scheduler_after_epoch": true,
34
+ "lr": 0.001,
35
+ "optimizer": "AdamW",
36
+ "optimizer_params": {
37
+ "betas": [
38
+ 0.8,
39
+ 0.99
40
+ ],
41
+ "eps": 1e-09,
42
+ "weight_decay": 0.01
43
+ },
44
+ "lr_scheduler": null,
45
+ "lr_scheduler_params": {},
46
+ "use_grad_scaler": false,
47
+ "cudnn_enable": true,
48
+ "cudnn_deterministic": false,
49
+ "cudnn_benchmark": false,
50
+ "training_seed": 54321,
51
+ "model": "freevc",
52
+ "num_loader_workers": 0,
53
+ "num_eval_loader_workers": 0,
54
+ "use_noise_augment": false,
55
+ "audio": {
56
+ "max_wav_value": 32768.0,
57
+ "input_sample_rate": 16000,
58
+ "output_sample_rate": 24000,
59
+ "filter_length": 1280,
60
+ "hop_length": 320,
61
+ "win_length": 1280,
62
+ "n_mel_channels": 80,
63
+ "mel_fmin": 0.0,
64
+ "mel_fmax": null
65
+ },
66
+ "batch_group_size": 0,
67
+ "loss_masking": null,
68
+ "min_audio_len": 1,
69
+ "max_audio_len": Infinity,
70
+ "min_text_len": 1,
71
+ "max_text_len": Infinity,
72
+ "compute_f0": false,
73
+ "compute_energy": false,
74
+ "compute_linear_spec": true,
75
+ "precompute_num_workers": 0,
76
+ "start_by_longest": false,
77
+ "shuffle": false,
78
+ "drop_last": false,
79
+ "datasets": [
80
+ {
81
+ "formatter": "",
82
+ "dataset_name": "",
83
+ "path": "",
84
+ "meta_file_train": "",
85
+ "ignored_speakers": null,
86
+ "language": "",
87
+ "phonemizer": "",
88
+ "meta_file_val": "",
89
+ "meta_file_attn_mask": ""
90
+ }
91
+ ],
92
+ "test_sentences": [
93
+ [
94
+ "It took me quite a long time to develop a voice, and now that I have it I'm not going to be silent."
95
+ ],
96
+ [
97
+ "Be a voice, not an echo."
98
+ ],
99
+ [
100
+ "I'm sorry Dave. I'm afraid I can't do that."
101
+ ],
102
+ [
103
+ "This cake is great. It's so delicious and moist."
104
+ ],
105
+ [
106
+ "Prior to November 22, 1963."
107
+ ]
108
+ ],
109
+ "eval_split_max_size": null,
110
+ "eval_split_size": 0.01,
111
+ "use_speaker_weighted_sampler": false,
112
+ "speaker_weighted_sampler_alpha": 1.0,
113
+ "use_language_weighted_sampler": false,
114
+ "language_weighted_sampler_alpha": 1.0,
115
+ "use_length_weighted_sampler": false,
116
+ "length_weighted_sampler_alpha": 1.0,
117
+ "model_args": {
118
+ "spec_channels": 641,
119
+ "inter_channels": 192,
120
+ "hidden_channels": 192,
121
+ "filter_channels": 768,
122
+ "n_heads": 2,
123
+ "n_layers": 6,
124
+ "kernel_size": 3,
125
+ "p_dropout": 0.1,
126
+ "resblock": "1",
127
+ "resblock_kernel_sizes": [
128
+ 3,
129
+ 7,
130
+ 11
131
+ ],
132
+ "resblock_dilation_sizes": [
133
+ [
134
+ 1,
135
+ 3,
136
+ 5
137
+ ],
138
+ [
139
+ 1,
140
+ 3,
141
+ 5
142
+ ],
143
+ [
144
+ 1,
145
+ 3,
146
+ 5
147
+ ]
148
+ ],
149
+ "upsample_rates": [
150
+ 10,
151
+ 6,
152
+ 4,
153
+ 2
154
+ ],
155
+ "upsample_initial_channel": 512,
156
+ "upsample_kernel_sizes": [
157
+ 16,
158
+ 16,
159
+ 4,
160
+ 4
161
+ ],
162
+ "n_layers_q": 3,
163
+ "use_spectral_norm": false,
164
+ "gin_channels": 256,
165
+ "ssl_dim": 1024,
166
+ "use_spk": true,
167
+ "num_spks": 0,
168
+ "segment_size": 8960
169
+ },
170
+ "lr_gen": 0.0002,
171
+ "lr_disc": 0.0002,
172
+ "lr_scheduler_gen": "ExponentialLR",
173
+ "lr_scheduler_gen_params": {
174
+ "gamma": 0.999875,
175
+ "last_epoch": -1
176
+ },
177
+ "lr_scheduler_disc": "ExponentialLR",
178
+ "lr_scheduler_disc_params": {
179
+ "gamma": 0.999875,
180
+ "last_epoch": -1
181
+ },
182
+ "kl_loss_alpha": 1.0,
183
+ "disc_loss_alpha": 1.0,
184
+ "gen_loss_alpha": 1.0,
185
+ "feat_loss_alpha": 1.0,
186
+ "mel_loss_alpha": 45.0,
187
+ "dur_loss_alpha": 1.0,
188
+ "speaker_encoder_loss_alpha": 1.0,
189
+ "return_wav": true,
190
+ "use_weighted_sampler": false,
191
+ "weighted_sampler_attrs": {},
192
+ "weighted_sampler_multipliers": {},
193
+ "r": 1,
194
+ "add_blank": true,
195
+ "num_speakers": 0,
196
+ "use_speaker_embedding": false,
197
+ "speakers_file": null,
198
+ "speaker_embedding_channels": 256,
199
+ "language_ids_file": null,
200
+ "use_language_embedding": false,
201
+ "use_d_vector_file": false,
202
+ "d_vector_file": null,
203
+ "d_vector_dim": null
204
+ }
tts_models/voice_conversion_models--multilingual--vctk--freevc24/model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:18d4ce44e7c803d675be1984b174e0f7bf05ce937419f19a818877e83f197007
3
+ size 1425242419
tts_models/wavlm/WavLM-Large.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6fb4b3c3e6aa567f0a997b30855859cb81528ee8078802af439f7b2da0bf100f
3
+ size 1261965425