Upload 4 files
Browse files- all_base.json +62 -0
- all_base.onnx +3 -0
- all_base_f16.onnx +3 -0
- inference_ms_cpu.py +18 -9
all_base.json
ADDED
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"train": {
|
3 |
+
"log_interval": 1000,
|
4 |
+
"eval_interval": 5000,
|
5 |
+
"seed": 1234,
|
6 |
+
"epochs": 32,
|
7 |
+
"learning_rate": 1e-4,
|
8 |
+
"betas": [0.8, 0.99],
|
9 |
+
"eps": 1e-9,
|
10 |
+
"batch_size": 16,
|
11 |
+
"fp16_run": true,
|
12 |
+
"lr_decay": 0.999875,
|
13 |
+
"segment_size": 8192,
|
14 |
+
"init_lr_ratio": 1,
|
15 |
+
"warmup_epochs": 0,
|
16 |
+
"c_mel": 45,
|
17 |
+
"c_kl": 1.0
|
18 |
+
},
|
19 |
+
"data": {
|
20 |
+
"use_mel_posterior_encoder": true,
|
21 |
+
"training_files":"filelists/all_train.txt.cleaned",
|
22 |
+
"validation_files":"filelists/all_valid.txt.cleaned",
|
23 |
+
"text_cleaners":["canvers_ko_cleaners"],
|
24 |
+
"max_wav_value": 32768.0,
|
25 |
+
"sampling_rate": 22050,
|
26 |
+
"filter_length": 1024,
|
27 |
+
"hop_length": 256,
|
28 |
+
"win_length": 1024,
|
29 |
+
"n_mel_channels": 80,
|
30 |
+
"mel_fmin": 0.0,
|
31 |
+
"mel_fmax": null,
|
32 |
+
"add_blank": false,
|
33 |
+
"n_speakers": 66,
|
34 |
+
"cleaned_text": true
|
35 |
+
},
|
36 |
+
"model": {
|
37 |
+
"use_mel_posterior_encoder": true,
|
38 |
+
"use_transformer_flows": true,
|
39 |
+
"transformer_flow_type": "pre_conv",
|
40 |
+
"use_spk_conditioned_encoder": true,
|
41 |
+
"use_noise_scaled_mas": true,
|
42 |
+
"use_duration_discriminator": true,
|
43 |
+
"inter_channels": 192,
|
44 |
+
"hidden_channels": 192,
|
45 |
+
"filter_channels": 768,
|
46 |
+
"n_heads": 2,
|
47 |
+
"n_layers": 6,
|
48 |
+
"kernel_size": 3,
|
49 |
+
"p_dropout": 0.1,
|
50 |
+
"resblock": "1",
|
51 |
+
"resblock_kernel_sizes": [3,7,11],
|
52 |
+
"resblock_dilation_sizes": [[1,3,5], [1,3,5], [1,3,5]],
|
53 |
+
"upsample_rates": [8,8,2,2],
|
54 |
+
"upsample_initial_channel": 512,
|
55 |
+
"upsample_kernel_sizes": [16,16,4,4],
|
56 |
+
"n_layers_q": 3,
|
57 |
+
"use_spectral_norm": false,
|
58 |
+
"use_sdp": false,
|
59 |
+
"gin_channels": 256
|
60 |
+
}
|
61 |
+
}
|
62 |
+
|
all_base.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2aa95fa4b3cb25f3099ddb437b44d74c702ee150809f9f1fa6d24c5b4858865a
|
3 |
+
size 123513875
|
all_base_f16.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9d44866b44c06e280e792002b87c3e152ec79132c22100f7c63ab1aa2c5853f1
|
3 |
+
size 62511742
|
inference_ms_cpu.py
CHANGED
@@ -19,15 +19,23 @@ def get_text(text, hps):
|
|
19 |
text_norm = torch.LongTensor(text_norm)
|
20 |
return text_norm
|
21 |
|
22 |
-
LANG = '
|
23 |
CONFIG_PATH = f"./configs/{LANG}_base.json"
|
24 |
-
MODEL_PATH = f"./logs/{LANG}_base/
|
25 |
-
|
26 |
-
|
27 |
-
TEXT = "
|
28 |
-
#TEXT = "
|
29 |
-
#
|
30 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
|
32 |
hps = utils.get_hparams_from_file(CONFIG_PATH)
|
33 |
|
@@ -54,6 +62,7 @@ _ = net_g.eval()
|
|
54 |
|
55 |
_ = utils.load_checkpoint(MODEL_PATH, net_g, None)
|
56 |
|
|
|
57 |
stn_tst = get_text(TEXT, hps)
|
58 |
|
59 |
with torch.no_grad():
|
@@ -77,4 +86,4 @@ with torch.no_grad():
|
|
77 |
.numpy()
|
78 |
)
|
79 |
print(i, time.time() - start)
|
80 |
-
write(data=audio, rate=hps.data.sampling_rate, filename=f"
|
|
|
19 |
text_norm = torch.LongTensor(text_norm)
|
20 |
return text_norm
|
21 |
|
22 |
+
LANG = 'all'
|
23 |
CONFIG_PATH = f"./configs/{LANG}_base.json"
|
24 |
+
MODEL_PATH = f"./logs/{LANG}_base/G_250000.pth"
|
25 |
+
|
26 |
+
|
27 |
+
#TEXT = "こんにちは。韓国のロボットを見に日本からここまで来てくれたのに苦労しました。日本語も上手ですか?"
|
28 |
+
#TEXT = "Ciao. È una giornata davvero calda. piacere di conoscerti"
|
29 |
+
#CONFIG_PATH = f"./configs/{LANG}_base.json"
|
30 |
+
#MODEL_PATH = f"./logs/{LANG}_base/G_210000.pth"
|
31 |
+
#TEXT = "안녕하세요. 저는 서큘러스의 인공지능 파이온 입니다. 앞으로 로봇시대를 이끌어 나가도록 하겠습니다!"
|
32 |
+
#TEXT= "你好。 我是 Pion,Circulus 的人工智能。让我们引领未来机器人时代!"
|
33 |
+
TEXT = "I am artificial intelligent voice made by circulus. It is the way."
|
34 |
+
#TEXT = f"Hola. encantado de conocerlo ¿Qué estás haciendo?"
|
35 |
+
#TEXT = "नमस्ते। मेरा नाम पिबो है, सर्कुलस का रोबोट। आपसे मिलकर अच्छा लगा"
|
36 |
+
#SPK_ID = 45
|
37 |
+
#SPK_ID = 20
|
38 |
+
#OUTPUT_WAV_PATH = "vits_test"
|
39 |
|
40 |
hps = utils.get_hparams_from_file(CONFIG_PATH)
|
41 |
|
|
|
62 |
|
63 |
_ = utils.load_checkpoint(MODEL_PATH, net_g, None)
|
64 |
|
65 |
+
hps.data.text_cleaners = ["canvers_en_cleaners"]
|
66 |
stn_tst = get_text(TEXT, hps)
|
67 |
|
68 |
with torch.no_grad():
|
|
|
86 |
.numpy()
|
87 |
)
|
88 |
print(i, time.time() - start)
|
89 |
+
write(data=audio, rate=hps.data.sampling_rate, filename=f"t_{LANG}_{i}.wav")
|