rippertnt commited on
Commit
f519fc8
·
verified ·
1 Parent(s): 4ed276b

Upload 4 files

Browse files
Files changed (4) hide show
  1. all_base.json +62 -0
  2. all_base.onnx +3 -0
  3. all_base_f16.onnx +3 -0
  4. inference_ms_cpu.py +18 -9
all_base.json ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "train": {
3
+ "log_interval": 1000,
4
+ "eval_interval": 5000,
5
+ "seed": 1234,
6
+ "epochs": 32,
7
+ "learning_rate": 1e-4,
8
+ "betas": [0.8, 0.99],
9
+ "eps": 1e-9,
10
+ "batch_size": 16,
11
+ "fp16_run": true,
12
+ "lr_decay": 0.999875,
13
+ "segment_size": 8192,
14
+ "init_lr_ratio": 1,
15
+ "warmup_epochs": 0,
16
+ "c_mel": 45,
17
+ "c_kl": 1.0
18
+ },
19
+ "data": {
20
+ "use_mel_posterior_encoder": true,
21
+ "training_files":"filelists/all_train.txt.cleaned",
22
+ "validation_files":"filelists/all_valid.txt.cleaned",
23
+ "text_cleaners":["canvers_ko_cleaners"],
24
+ "max_wav_value": 32768.0,
25
+ "sampling_rate": 22050,
26
+ "filter_length": 1024,
27
+ "hop_length": 256,
28
+ "win_length": 1024,
29
+ "n_mel_channels": 80,
30
+ "mel_fmin": 0.0,
31
+ "mel_fmax": null,
32
+ "add_blank": false,
33
+ "n_speakers": 66,
34
+ "cleaned_text": true
35
+ },
36
+ "model": {
37
+ "use_mel_posterior_encoder": true,
38
+ "use_transformer_flows": true,
39
+ "transformer_flow_type": "pre_conv",
40
+ "use_spk_conditioned_encoder": true,
41
+ "use_noise_scaled_mas": true,
42
+ "use_duration_discriminator": true,
43
+ "inter_channels": 192,
44
+ "hidden_channels": 192,
45
+ "filter_channels": 768,
46
+ "n_heads": 2,
47
+ "n_layers": 6,
48
+ "kernel_size": 3,
49
+ "p_dropout": 0.1,
50
+ "resblock": "1",
51
+ "resblock_kernel_sizes": [3,7,11],
52
+ "resblock_dilation_sizes": [[1,3,5], [1,3,5], [1,3,5]],
53
+ "upsample_rates": [8,8,2,2],
54
+ "upsample_initial_channel": 512,
55
+ "upsample_kernel_sizes": [16,16,4,4],
56
+ "n_layers_q": 3,
57
+ "use_spectral_norm": false,
58
+ "use_sdp": false,
59
+ "gin_channels": 256
60
+ }
61
+ }
62
+
all_base.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2aa95fa4b3cb25f3099ddb437b44d74c702ee150809f9f1fa6d24c5b4858865a
3
+ size 123513875
all_base_f16.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d44866b44c06e280e792002b87c3e152ec79132c22100f7c63ab1aa2c5853f1
3
+ size 62511742
inference_ms_cpu.py CHANGED
@@ -19,15 +19,23 @@ def get_text(text, hps):
19
  text_norm = torch.LongTensor(text_norm)
20
  return text_norm
21
 
22
- LANG = 'ru'
23
  CONFIG_PATH = f"./configs/{LANG}_base.json"
24
- MODEL_PATH = f"./logs/{LANG}_base/G_40000.pth"
25
- #TEXT = "I am artificial intelligent voice made by circulus."
26
- #TEXT = "저는 서큘러스의 AI Voice 모델입니다. 오늘도 즐거운하루 보내세요."
27
- TEXT = "привет. Я президент Путин, и мне нравятся советские лидеры Сталин и Ленин."
28
- #TEXT = "Xin chào. Tôi Tổng thống Putin tôi thích các nhà lãnh đạo Liên Xô Stalin và Lenin."
29
- #TEXT = "สวัสดี. ผมเป็นประธานาธิบดีปูติน และผมชอบผู้นำโซเวียตอย่างสตาลินและเลนิน"
30
- #TEXT = "Halo. Saya Presiden Putin, dan saya menyukai pemimpin Soviet Stalin dan Lenin."
 
 
 
 
 
 
 
 
31
 
32
  hps = utils.get_hparams_from_file(CONFIG_PATH)
33
 
@@ -54,6 +62,7 @@ _ = net_g.eval()
54
 
55
  _ = utils.load_checkpoint(MODEL_PATH, net_g, None)
56
 
 
57
  stn_tst = get_text(TEXT, hps)
58
 
59
  with torch.no_grad():
@@ -77,4 +86,4 @@ with torch.no_grad():
77
  .numpy()
78
  )
79
  print(i, time.time() - start)
80
- write(data=audio, rate=hps.data.sampling_rate, filename=f"test_{LANG}_{i}.wav")
 
19
  text_norm = torch.LongTensor(text_norm)
20
  return text_norm
21
 
22
+ LANG = 'all'
23
  CONFIG_PATH = f"./configs/{LANG}_base.json"
24
+ MODEL_PATH = f"./logs/{LANG}_base/G_250000.pth"
25
+
26
+
27
+ #TEXT = "こんにちは。韓国のロボットを見に日本からここまで来てくれたのに苦労しました。日本語も上手ですか?"
28
+ #TEXT = "Ciao. È una giornata davvero calda. piacere di conoscerti"
29
+ #CONFIG_PATH = f"./configs/{LANG}_base.json"
30
+ #MODEL_PATH = f"./logs/{LANG}_base/G_210000.pth"
31
+ #TEXT = "안녕하세요. 저는 서큘러스의 인공지능 파이온 입니다. 앞으로 로봇시대를 이끌어 나가도록 하겠습니다!"
32
+ #TEXT= "你好。 我是 Pion,Circulus 的人工智能。让我们引领未来机器人时代!"
33
+ TEXT = "I am artificial intelligent voice made by circulus. It is the way."
34
+ #TEXT = f"Hola. encantado de conocerlo ¿Qué estás haciendo?"
35
+ #TEXT = "नमस्ते। मेरा नाम पिबो है, सर्कुलस का रोबोट। आपसे मिलकर अच्छा लगा"
36
+ #SPK_ID = 45
37
+ #SPK_ID = 20
38
+ #OUTPUT_WAV_PATH = "vits_test"
39
 
40
  hps = utils.get_hparams_from_file(CONFIG_PATH)
41
 
 
62
 
63
  _ = utils.load_checkpoint(MODEL_PATH, net_g, None)
64
 
65
+ hps.data.text_cleaners = ["canvers_en_cleaners"]
66
  stn_tst = get_text(TEXT, hps)
67
 
68
  with torch.no_grad():
 
86
  .numpy()
87
  )
88
  print(i, time.time() - start)
89
+ write(data=audio, rate=hps.data.sampling_rate, filename=f"t_{LANG}_{i}.wav")