NeonBohdan
commited on
Commit
•
5c12205
1
Parent(s):
3e57ccb
Added ukr-ru config
Browse files- config.json +34 -21
config.json
CHANGED
@@ -43,11 +43,14 @@
|
|
43 |
"lr_scheduler": "",
|
44 |
"lr_scheduler_params": {},
|
45 |
"use_grad_scaler": false,
|
|
|
|
|
|
|
|
|
46 |
"model": "vits",
|
47 |
"num_loader_workers": 8,
|
48 |
"num_eval_loader_workers": 8,
|
49 |
"use_noise_augment": false,
|
50 |
-
"use_language_weighted_sampler": true,
|
51 |
"audio": {
|
52 |
"fft_size": 1024,
|
53 |
"win_length": 1024,
|
@@ -61,8 +64,8 @@
|
|
61 |
"ref_level_db": 20,
|
62 |
"do_sound_norm": false,
|
63 |
"log_func": "np.log",
|
64 |
-
"do_trim_silence":
|
65 |
-
"trim_db":
|
66 |
"do_rms_norm": false,
|
67 |
"db_level": null,
|
68 |
"power": 1.5,
|
@@ -75,7 +78,7 @@
|
|
75 |
"do_amp_to_db_mel": true,
|
76 |
"pitch_fmax": 640.0,
|
77 |
"pitch_fmin": 0.0,
|
78 |
-
"signal_norm":
|
79 |
"min_level_db": -100,
|
80 |
"symmetric_norm": true,
|
81 |
"max_norm": 4.0,
|
@@ -97,7 +100,7 @@
|
|
97 |
"eos": "<EOS>",
|
98 |
"bos": "<BOS>",
|
99 |
"blank": "<BLNK>",
|
100 |
-
"characters": "
|
101 |
"punctuations": "!'(),-.:;? ",
|
102 |
"phonemes": null,
|
103 |
"is_unique": true,
|
@@ -108,51 +111,61 @@
|
|
108 |
"loss_masking": null,
|
109 |
"sort_by_audio_len": true,
|
110 |
"min_audio_len": 32768,
|
111 |
-
"max_audio_len":
|
112 |
"min_text_len": 1,
|
113 |
"max_text_len": Infinity,
|
114 |
"compute_f0": false,
|
115 |
"compute_linear_spec": true,
|
116 |
-
"precompute_num_workers":
|
117 |
"start_by_longest": false,
|
118 |
"datasets": [
|
119 |
{
|
120 |
"name": "mailabs",
|
121 |
-
"path": "",
|
122 |
"meta_file_train": "",
|
123 |
-
"ignored_speakers":
|
|
|
|
|
|
|
124 |
"language": "uk",
|
125 |
"meta_file_val": "",
|
126 |
"meta_file_attn_mask": ""
|
127 |
},
|
128 |
{
|
129 |
"name": "mailabs",
|
130 |
-
"path": "",
|
131 |
"meta_file_train": "",
|
132 |
-
"ignored_speakers":
|
133 |
-
|
|
|
|
|
|
|
134 |
"meta_file_val": "",
|
135 |
"meta_file_attn_mask": ""
|
136 |
}
|
137 |
],
|
138 |
"test_sentences": [
|
139 |
[
|
140 |
-
"\
|
141 |
"sumska",
|
142 |
null,
|
143 |
"uk"
|
144 |
],
|
145 |
[
|
146 |
-
"
|
147 |
-
"
|
148 |
null,
|
149 |
-
"
|
150 |
]
|
151 |
],
|
152 |
"eval_split_max_size": null,
|
153 |
"eval_split_size": 0.01,
|
|
|
|
|
|
|
|
|
154 |
"model_args": {
|
155 |
-
"num_chars":
|
156 |
"out_channels": 513,
|
157 |
"spec_segment_size": 32,
|
158 |
"hidden_channels": 192,
|
@@ -168,7 +181,7 @@
|
|
168 |
"kernel_size_flow": 5,
|
169 |
"dilation_rate_flow": 1,
|
170 |
"num_layers_flow": 4,
|
171 |
-
"resblock_type_decoder": "
|
172 |
"resblock_kernel_sizes_decoder": [
|
173 |
3,
|
174 |
7,
|
@@ -204,7 +217,7 @@
|
|
204 |
4,
|
205 |
4
|
206 |
],
|
207 |
-
"use_sdp":
|
208 |
"noise_scale": 1.0,
|
209 |
"inference_noise_scale": 0.667,
|
210 |
"length_scale": 1,
|
@@ -222,7 +235,7 @@
|
|
222 |
"d_vector_dim": 0,
|
223 |
"detach_dp_input": true,
|
224 |
"use_language_embedding": true,
|
225 |
-
"embedded_language_dim":
|
226 |
"num_languages": 2,
|
227 |
"language_ids_file": null,
|
228 |
"use_speaker_encoder_as_loss": false,
|
@@ -256,7 +269,7 @@
|
|
256 |
"speaker_encoder_loss_alpha": 1.0,
|
257 |
"return_wav": true,
|
258 |
"r": 1,
|
259 |
-
"num_speakers":
|
260 |
"use_speaker_embedding": true,
|
261 |
"speakers_file": null,
|
262 |
"speaker_embedding_channels": 256,
|
|
|
43 |
"lr_scheduler": "",
|
44 |
"lr_scheduler_params": {},
|
45 |
"use_grad_scaler": false,
|
46 |
+
"cudnn_enable": true,
|
47 |
+
"cudnn_deterministic": false,
|
48 |
+
"cudnn_benchmark": true,
|
49 |
+
"training_seed": 54321,
|
50 |
"model": "vits",
|
51 |
"num_loader_workers": 8,
|
52 |
"num_eval_loader_workers": 8,
|
53 |
"use_noise_augment": false,
|
|
|
54 |
"audio": {
|
55 |
"fft_size": 1024,
|
56 |
"win_length": 1024,
|
|
|
64 |
"ref_level_db": 20,
|
65 |
"do_sound_norm": false,
|
66 |
"log_func": "np.log",
|
67 |
+
"do_trim_silence": true,
|
68 |
+
"trim_db": 45,
|
69 |
"do_rms_norm": false,
|
70 |
"db_level": null,
|
71 |
"power": 1.5,
|
|
|
78 |
"do_amp_to_db_mel": true,
|
79 |
"pitch_fmax": 640.0,
|
80 |
"pitch_fmin": 0.0,
|
81 |
+
"signal_norm": false,
|
82 |
"min_level_db": -100,
|
83 |
"symmetric_norm": true,
|
84 |
"max_norm": 4.0,
|
|
|
100 |
"eos": "<EOS>",
|
101 |
"bos": "<BOS>",
|
102 |
"blank": "<BLNK>",
|
103 |
+
"characters": "\u0430\u0431\u0432\u0433\u0491\u0434\u0435\u0454\u0436\u0437\u0438\u0456\u0457\u0439\u043a\u043b\u043c\u043d\u043e\u043f\u0440\u0441\u0442\u0443\u0444\u0445\u0446\u0447\u0448\u0449\u044c\u044e\u044f\u044d\u0451\u044b\u044a",
|
104 |
"punctuations": "!'(),-.:;? ",
|
105 |
"phonemes": null,
|
106 |
"is_unique": true,
|
|
|
111 |
"loss_masking": null,
|
112 |
"sort_by_audio_len": true,
|
113 |
"min_audio_len": 32768,
|
114 |
+
"max_audio_len": 224000,
|
115 |
"min_text_len": 1,
|
116 |
"max_text_len": Infinity,
|
117 |
"compute_f0": false,
|
118 |
"compute_linear_spec": true,
|
119 |
+
"precompute_num_workers": 12,
|
120 |
"start_by_longest": false,
|
121 |
"datasets": [
|
122 |
{
|
123 |
"name": "mailabs",
|
124 |
+
"path": "./logs/uk_UK",
|
125 |
"meta_file_train": "",
|
126 |
+
"ignored_speakers": [
|
127 |
+
"obruchov",
|
128 |
+
"shepel"
|
129 |
+
],
|
130 |
"language": "uk",
|
131 |
"meta_file_val": "",
|
132 |
"meta_file_attn_mask": ""
|
133 |
},
|
134 |
{
|
135 |
"name": "mailabs",
|
136 |
+
"path": "./logs/ru_RU",
|
137 |
"meta_file_train": "",
|
138 |
+
"ignored_speakers": [
|
139 |
+
"minaev",
|
140 |
+
"nikolaev"
|
141 |
+
],
|
142 |
+
"language": "ru",
|
143 |
"meta_file_val": "",
|
144 |
"meta_file_attn_mask": ""
|
145 |
}
|
146 |
],
|
147 |
"test_sentences": [
|
148 |
[
|
149 |
+
"\u0412\u0435\u0441\u0435\u043b\u043a\u0430, \u0442\u0430\u043a\u043e\u0436 \u0440\u0430\u0439\u0434\u0443\u0433\u0430 \u043e\u043f\u0442\u0438\u0447\u043d\u0435 \u044f\u0432\u0438\u0449\u0435 \u0432 \u0430\u0442\u043c\u043e\u0441\u0444\u0435\u0440\u0456, \u0449\u043e \u044f\u0432\u043b\u044f\u0454 \u0441\u043e\u0431\u043e\u044e \u043e\u0434\u043d\u0443, \u0434\u0432\u0456 \u0447\u0438 \u0434\u0435\u043a\u0456\u043b\u044c\u043a\u0430 \u0440\u0456\u0437\u043d\u043e\u043a\u043e\u043b\u044c\u043e\u0440\u043e\u0432\u0438\u0445 \u0434\u0443\u0433.",
|
150 |
"sumska",
|
151 |
null,
|
152 |
"uk"
|
153 |
],
|
154 |
[
|
155 |
+
"\u0420\u0430\u0434\u0443\u0433\u0430, \u0430\u0442\u043c\u043e\u0441\u0444\u0435\u0440\u043d\u043e\u0435, \u043e\u043f\u0442\u0438\u0447\u0435\u0441\u043a\u043e\u0435 \u0438 \u043c\u0435\u0442\u0435\u043e\u0440\u043e\u043b\u043e\u0433\u0438\u0447\u0435\u0441\u043a\u043e\u0435 \u044f\u0432\u043b\u0435\u043d\u0438\u0435, \u043d\u0430\u0431\u043b\u044e\u0434\u0430\u0435\u043c\u043e\u0435 \u043f\u0440\u0438 \u043e\u0441\u0432\u0435\u0449\u0435\u043d\u0438\u0438 \u044f\u0440\u043a\u0438\u043c \u0438\u0441\u0442\u043e\u0447\u043d\u0438\u043a\u043e\u043c \u0441\u0432\u0435\u0442\u0430.",
|
156 |
+
"hajdurova",
|
157 |
null,
|
158 |
+
"ru"
|
159 |
]
|
160 |
],
|
161 |
"eval_split_max_size": null,
|
162 |
"eval_split_size": 0.01,
|
163 |
+
"use_speaker_weighted_sampler": false,
|
164 |
+
"speaker_weighted_sampler_alpha": 1.0,
|
165 |
+
"use_language_weighted_sampler": true,
|
166 |
+
"language_weighted_sampler_alpha": 1.0,
|
167 |
"model_args": {
|
168 |
+
"num_chars": 52,
|
169 |
"out_channels": 513,
|
170 |
"spec_segment_size": 32,
|
171 |
"hidden_channels": 192,
|
|
|
181 |
"kernel_size_flow": 5,
|
182 |
"dilation_rate_flow": 1,
|
183 |
"num_layers_flow": 4,
|
184 |
+
"resblock_type_decoder": "2",
|
185 |
"resblock_kernel_sizes_decoder": [
|
186 |
3,
|
187 |
7,
|
|
|
217 |
4,
|
218 |
4
|
219 |
],
|
220 |
+
"use_sdp": true,
|
221 |
"noise_scale": 1.0,
|
222 |
"inference_noise_scale": 0.667,
|
223 |
"length_scale": 1,
|
|
|
235 |
"d_vector_dim": 0,
|
236 |
"detach_dp_input": true,
|
237 |
"use_language_embedding": true,
|
238 |
+
"embedded_language_dim": 4,
|
239 |
"num_languages": 2,
|
240 |
"language_ids_file": null,
|
241 |
"use_speaker_encoder_as_loss": false,
|
|
|
269 |
"speaker_encoder_loss_alpha": 1.0,
|
270 |
"return_wav": true,
|
271 |
"r": 1,
|
272 |
+
"num_speakers": 2,
|
273 |
"use_speaker_embedding": true,
|
274 |
"speakers_file": null,
|
275 |
"speaker_embedding_channels": 256,
|