Spaces:
Running
Running
Kit-Lemonfoot
commited on
Commit
·
918ef2d
1
Parent(s):
827b7fd
Added some more indies, moved Dramatubers to their own tab, a few fixes
Browse files
app.py
CHANGED
@@ -35,6 +35,9 @@ limitation = os.getenv("SYSTEM") == "spaces"
|
|
35 |
#limitation=True
|
36 |
language_dict = tts_order_voice
|
37 |
|
|
|
|
|
|
|
38 |
audio_mode = []
|
39 |
f0method_mode = []
|
40 |
if limitation is True:
|
@@ -60,8 +63,10 @@ vcArr.append(VC(48000, config))
|
|
60 |
def infer(name, path, index, vc_audio_mode, vc_input, vc_upload, tts_text, tts_voice, f0_up_key, f0_method, index_rate, filter_radius, resample_sr, rms_mix_rate, protect, record_button):
|
61 |
try:
|
62 |
#Setup audio
|
|
|
63 |
if vc_audio_mode == "Input path" or "Youtube" and vc_input != "":
|
64 |
audio, sr = librosa.load(vc_input, sr=16000, mono=True)
|
|
|
65 |
elif vc_audio_mode == "Upload audio":
|
66 |
if vc_upload is None:
|
67 |
return "Please upload an audio file.", None
|
@@ -81,7 +86,11 @@ def infer(name, path, index, vc_audio_mode, vc_input, vc_upload, tts_text, tts_v
|
|
81 |
if tts_text is None or tts_voice is None or tts_text=="":
|
82 |
return "You need to enter text and select a voice.", None
|
83 |
voice = language_dict[tts_voice]
|
84 |
-
|
|
|
|
|
|
|
|
|
85 |
try:
|
86 |
audio, sr = librosa.load("tts.mp3", sr=16000, mono=True)
|
87 |
except:
|
@@ -103,6 +112,13 @@ def infer(name, path, index, vc_audio_mode, vc_input, vc_upload, tts_text, tts_v
|
|
103 |
if sampling_rate != 16000:
|
104 |
audio = librosa.resample(audio, orig_sr=sampling_rate, target_sr=16000)
|
105 |
tts_text = "Recorded Audio"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
106 |
times = [0, 0, 0]
|
107 |
f0_up_key = int(f0_up_key)
|
108 |
|
@@ -187,6 +203,8 @@ def load_model():
|
|
187 |
model_index = f"weights/{category_folder}/{character_name}/{info['feature_retrieval_library']}"
|
188 |
if info['feature_retrieval_library'] == "None":
|
189 |
model_index = None
|
|
|
|
|
190 |
model_path = f"weights/{category_folder}/{character_name}/{model_name}"
|
191 |
cpt = torch.load(f"weights/{category_folder}/{character_name}/{model_name}", map_location="cpu")
|
192 |
model_version = cpt.get("version", "v1")
|
@@ -282,9 +300,6 @@ def change_audio_mode(vc_audio_mode):
|
|
282 |
gr.Audio.update(visible=False),
|
283 |
gr.Audio.update(visible=False),
|
284 |
gr.Audio.update(visible=False),
|
285 |
-
gr.Slider.update(visible=False),
|
286 |
-
gr.Audio.update(visible=False),
|
287 |
-
gr.Button.update(visible=False),
|
288 |
# EdgeTTS
|
289 |
gr.Textbox.update(visible=False),
|
290 |
gr.Dropdown.update(visible=False),
|
@@ -304,9 +319,6 @@ def change_audio_mode(vc_audio_mode):
|
|
304 |
gr.Audio.update(visible=False),
|
305 |
gr.Audio.update(visible=False),
|
306 |
gr.Audio.update(visible=False),
|
307 |
-
gr.Slider.update(visible=False),
|
308 |
-
gr.Audio.update(visible=False),
|
309 |
-
gr.Button.update(visible=False),
|
310 |
# EdgeTTS
|
311 |
gr.Textbox.update(visible=False),
|
312 |
gr.Dropdown.update(visible=False),
|
@@ -326,9 +338,6 @@ def change_audio_mode(vc_audio_mode):
|
|
326 |
gr.Audio.update(visible=True),
|
327 |
gr.Audio.update(visible=True),
|
328 |
gr.Audio.update(visible=True),
|
329 |
-
gr.Slider.update(visible=True),
|
330 |
-
gr.Audio.update(visible=True),
|
331 |
-
gr.Button.update(visible=True),
|
332 |
# TTS
|
333 |
gr.Textbox.update(visible=False),
|
334 |
gr.Dropdown.update(visible=False),
|
@@ -348,9 +357,6 @@ def change_audio_mode(vc_audio_mode):
|
|
348 |
gr.Audio.update(visible=False),
|
349 |
gr.Audio.update(visible=False),
|
350 |
gr.Audio.update(visible=False),
|
351 |
-
gr.Slider.update(visible=False),
|
352 |
-
gr.Audio.update(visible=False),
|
353 |
-
gr.Button.update(visible=False),
|
354 |
# TTS
|
355 |
gr.Textbox.update(visible=True),
|
356 |
gr.Dropdown.update(visible=True),
|
@@ -370,9 +376,6 @@ def change_audio_mode(vc_audio_mode):
|
|
370 |
gr.Audio.update(visible=False),
|
371 |
gr.Audio.update(visible=False),
|
372 |
gr.Audio.update(visible=False),
|
373 |
-
gr.Slider.update(visible=False),
|
374 |
-
gr.Audio.update(visible=False),
|
375 |
-
gr.Button.update(visible=False),
|
376 |
# TTS
|
377 |
gr.Textbox.update(visible=False),
|
378 |
gr.Dropdown.update(visible=False),
|
@@ -392,9 +395,6 @@ def change_audio_mode(vc_audio_mode):
|
|
392 |
gr.Audio.update(visible=False),
|
393 |
gr.Audio.update(visible=False),
|
394 |
gr.Audio.update(visible=False),
|
395 |
-
gr.Slider.update(visible=False),
|
396 |
-
gr.Audio.update(visible=False),
|
397 |
-
gr.Button.update(visible=False),
|
398 |
# TTS
|
399 |
gr.Textbox.update(visible=False, interactive=True),
|
400 |
gr.Dropdown.update(visible=False, interactive=True),
|
@@ -535,19 +535,6 @@ if __name__ == '__main__':
|
|
535 |
],
|
536 |
outputs=[vc_log, vc_output]
|
537 |
)
|
538 |
-
|
539 |
-
vc_volume = gr.Slider(
|
540 |
-
minimum=0,
|
541 |
-
maximum=10,
|
542 |
-
label="Vocal volume",
|
543 |
-
value=4,
|
544 |
-
interactive=True,
|
545 |
-
step=1,
|
546 |
-
info="Adjust vocal volume (Default: 4}",
|
547 |
-
visible=False
|
548 |
-
)
|
549 |
-
vc_combined_output = gr.Audio(label="Output Combined Audio", visible=False)
|
550 |
-
vc_combine = gr.Button("Combine",variant="primary", visible=False)
|
551 |
|
552 |
with gr.Row():
|
553 |
with gr.Column():
|
@@ -582,11 +569,6 @@ if __name__ == '__main__':
|
|
582 |
inputs=[vc_link, vc_download_audio, vc_split_model],
|
583 |
outputs=[vc_vocal_preview, vc_inst_preview, vc_audio_preview, vc_input]
|
584 |
)
|
585 |
-
vc_combine.click(
|
586 |
-
fn=combine_vocal_and_inst,
|
587 |
-
inputs=[vc_output, vc_volume, vc_split_model],
|
588 |
-
outputs=[vc_combined_output]
|
589 |
-
)
|
590 |
vc_audio_mode.change(
|
591 |
fn=change_audio_mode,
|
592 |
inputs=[vc_audio_mode],
|
@@ -600,20 +582,19 @@ if __name__ == '__main__':
|
|
600 |
vc_vocal_preview,
|
601 |
vc_inst_preview,
|
602 |
vc_audio_preview,
|
603 |
-
|
604 |
-
vc_combined_output,
|
605 |
-
vc_combine,
|
606 |
tts_text,
|
607 |
tts_voice,
|
608 |
record_button
|
609 |
]
|
610 |
)
|
|
|
611 |
gr.Markdown(
|
612 |
"## <center>Credit to:\n"
|
613 |
"#### <center>Original devs:\n"
|
614 |
"<center>the RVC Project, lj1995, zomehwh, sysf\n\n"
|
615 |
"#### <center>Model creators:\n"
|
616 |
-
"<center>
|
617 |
)
|
618 |
if limitation is True:
|
619 |
app.queue(concurrency_count=1, max_size=20, api_open=config.api).launch(share=config.colab)
|
|
|
35 |
#limitation=True
|
36 |
language_dict = tts_order_voice
|
37 |
|
38 |
+
authors = ["dacoolkid44", "Hijack", "Maki Ligon", "megaaziib", "KitLemonfoot", "yeey5", "Sui", "MahdeenSky"]
|
39 |
+
authorskip = ["dacoolkid44 & Hijack", "dacoolkid44 & Hijack & Maki Ligon", "Kit Lemonfoot / NSHFB"]
|
40 |
+
|
41 |
audio_mode = []
|
42 |
f0method_mode = []
|
43 |
if limitation is True:
|
|
|
63 |
def infer(name, path, index, vc_audio_mode, vc_input, vc_upload, tts_text, tts_voice, f0_up_key, f0_method, index_rate, filter_radius, resample_sr, rms_mix_rate, protect, record_button):
|
64 |
try:
|
65 |
#Setup audio
|
66 |
+
audio=None
|
67 |
if vc_audio_mode == "Input path" or "Youtube" and vc_input != "":
|
68 |
audio, sr = librosa.load(vc_input, sr=16000, mono=True)
|
69 |
+
tts_text = "YouTube Audio"
|
70 |
elif vc_audio_mode == "Upload audio":
|
71 |
if vc_upload is None:
|
72 |
return "Please upload an audio file.", None
|
|
|
86 |
if tts_text is None or tts_voice is None or tts_text=="":
|
87 |
return "You need to enter text and select a voice.", None
|
88 |
voice = language_dict[tts_voice]
|
89 |
+
try:
|
90 |
+
asyncio.run(edge_tts.Communicate(tts_text, voice).save("tts.mp3"))
|
91 |
+
except:
|
92 |
+
print("Failed to get E-TTS handle. A restart may be needed soon.")
|
93 |
+
return "ERROR: Failed to communicate with Edge-TTS. The Edge-TTS service may be down or cannot communicate. Please try another method or try again later.", None
|
94 |
try:
|
95 |
audio, sr = librosa.load("tts.mp3", sr=16000, mono=True)
|
96 |
except:
|
|
|
112 |
if sampling_rate != 16000:
|
113 |
audio = librosa.resample(audio, orig_sr=sampling_rate, target_sr=16000)
|
114 |
tts_text = "Recorded Audio"
|
115 |
+
|
116 |
+
if audio is None:
|
117 |
+
if vc_audio_mode == "Edge-TTS":
|
118 |
+
print("Failed to get E-TTS handle. A restart may be needed soon.")
|
119 |
+
return "ERROR: Failed to obtain a correct response from Edge-TTS. The Edge-TTS service may be down or unable to communicate. Please try another method or try again later.", None
|
120 |
+
return "ERROR: Unknown audio error. Please try again.", None
|
121 |
+
|
122 |
times = [0, 0, 0]
|
123 |
f0_up_key = int(f0_up_key)
|
124 |
|
|
|
203 |
model_index = f"weights/{category_folder}/{character_name}/{info['feature_retrieval_library']}"
|
204 |
if info['feature_retrieval_library'] == "None":
|
205 |
model_index = None
|
206 |
+
if not (model_author in authors or model_author in authorskip):
|
207 |
+
authors.append(model_author)
|
208 |
model_path = f"weights/{category_folder}/{character_name}/{model_name}"
|
209 |
cpt = torch.load(f"weights/{category_folder}/{character_name}/{model_name}", map_location="cpu")
|
210 |
model_version = cpt.get("version", "v1")
|
|
|
300 |
gr.Audio.update(visible=False),
|
301 |
gr.Audio.update(visible=False),
|
302 |
gr.Audio.update(visible=False),
|
|
|
|
|
|
|
303 |
# EdgeTTS
|
304 |
gr.Textbox.update(visible=False),
|
305 |
gr.Dropdown.update(visible=False),
|
|
|
319 |
gr.Audio.update(visible=False),
|
320 |
gr.Audio.update(visible=False),
|
321 |
gr.Audio.update(visible=False),
|
|
|
|
|
|
|
322 |
# EdgeTTS
|
323 |
gr.Textbox.update(visible=False),
|
324 |
gr.Dropdown.update(visible=False),
|
|
|
338 |
gr.Audio.update(visible=True),
|
339 |
gr.Audio.update(visible=True),
|
340 |
gr.Audio.update(visible=True),
|
|
|
|
|
|
|
341 |
# TTS
|
342 |
gr.Textbox.update(visible=False),
|
343 |
gr.Dropdown.update(visible=False),
|
|
|
357 |
gr.Audio.update(visible=False),
|
358 |
gr.Audio.update(visible=False),
|
359 |
gr.Audio.update(visible=False),
|
|
|
|
|
|
|
360 |
# TTS
|
361 |
gr.Textbox.update(visible=True),
|
362 |
gr.Dropdown.update(visible=True),
|
|
|
376 |
gr.Audio.update(visible=False),
|
377 |
gr.Audio.update(visible=False),
|
378 |
gr.Audio.update(visible=False),
|
|
|
|
|
|
|
379 |
# TTS
|
380 |
gr.Textbox.update(visible=False),
|
381 |
gr.Dropdown.update(visible=False),
|
|
|
395 |
gr.Audio.update(visible=False),
|
396 |
gr.Audio.update(visible=False),
|
397 |
gr.Audio.update(visible=False),
|
|
|
|
|
|
|
398 |
# TTS
|
399 |
gr.Textbox.update(visible=False, interactive=True),
|
400 |
gr.Dropdown.update(visible=False, interactive=True),
|
|
|
535 |
],
|
536 |
outputs=[vc_log, vc_output]
|
537 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
538 |
|
539 |
with gr.Row():
|
540 |
with gr.Column():
|
|
|
569 |
inputs=[vc_link, vc_download_audio, vc_split_model],
|
570 |
outputs=[vc_vocal_preview, vc_inst_preview, vc_audio_preview, vc_input]
|
571 |
)
|
|
|
|
|
|
|
|
|
|
|
572 |
vc_audio_mode.change(
|
573 |
fn=change_audio_mode,
|
574 |
inputs=[vc_audio_mode],
|
|
|
582 |
vc_vocal_preview,
|
583 |
vc_inst_preview,
|
584 |
vc_audio_preview,
|
585 |
+
|
|
|
|
|
586 |
tts_text,
|
587 |
tts_voice,
|
588 |
record_button
|
589 |
]
|
590 |
)
|
591 |
+
authStr=", ".join(authors)
|
592 |
gr.Markdown(
|
593 |
"## <center>Credit to:\n"
|
594 |
"#### <center>Original devs:\n"
|
595 |
"<center>the RVC Project, lj1995, zomehwh, sysf\n\n"
|
596 |
"#### <center>Model creators:\n"
|
597 |
+
f"<center>{authStr}\n"
|
598 |
)
|
599 |
if limitation is True:
|
600 |
app.queue(concurrency_count=1, max_size=20, api_open=config.api).launch(share=config.colab)
|