Spaces:
Runtime error
Runtime error
kevinwang676
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -1,7 +1,4 @@
|
|
1 |
-
# adapted for Zero GPU on Hugging Face
|
2 |
-
|
3 |
import spaces
|
4 |
-
|
5 |
import os
|
6 |
import glob
|
7 |
import json
|
@@ -18,7 +15,6 @@ import sys
|
|
18 |
import io
|
19 |
import wave
|
20 |
from datetime import datetime
|
21 |
-
#from fairseq import checkpoint_utils
|
22 |
import urllib.request
|
23 |
import zipfile
|
24 |
import shutil
|
@@ -45,7 +41,7 @@ from vc_infer_pipeline import VC
|
|
45 |
from config import Config
|
46 |
config = Config()
|
47 |
logging.getLogger("numba").setLevel(logging.WARNING)
|
48 |
-
|
49 |
force_support = True
|
50 |
|
51 |
audio_mode = []
|
@@ -218,41 +214,30 @@ pre_fun_hp5 = func(
|
|
218 |
|
219 |
# Separate vocals
|
220 |
|
221 |
-
# GPU needed
|
222 |
-
#@spaces.GPU(duration=200)
|
223 |
-
def get_vocal_gpu(audio_path, split_model, filename):
|
224 |
-
if split_model=="UVR-HP2":
|
225 |
-
pre_fun = pre_fun_hp2
|
226 |
-
else:
|
227 |
-
pre_fun = pre_fun_hp5
|
228 |
-
return pre_fun._path_audio_(audio_path, f"./output/{split_model}/{filename}/", f"./output/{split_model}/{filename}/", "wav")
|
229 |
-
|
230 |
def youtube_downloader(
|
231 |
-
video_identifier,
|
232 |
filename,
|
233 |
split_model,
|
234 |
):
|
235 |
-
|
236 |
-
video_info = get_video_info(video_identifier)
|
237 |
-
print(video_info)
|
238 |
-
audio_content = get_response(video_info).content
|
239 |
-
with open(filename.strip() + ".wav", mode="wb") as f:
|
240 |
-
f.write(audio_content)
|
241 |
audio_path = filename.strip() + ".wav"
|
242 |
|
243 |
-
|
244 |
os.makedirs("output", exist_ok=True)
|
245 |
|
246 |
-
|
247 |
-
|
|
|
|
|
|
|
|
|
248 |
os.remove(filename.strip()+".wav")
|
249 |
|
250 |
return f"./output/{split_model}/{filename}/vocal_{filename}.wav_10.wav", f"./output/{split_model}/{filename}/instrument_{filename}.wav_10.wav"
|
251 |
|
252 |
# Original code
|
253 |
|
254 |
-
if force_support is False or
|
255 |
-
if
|
256 |
audio_mode = ["Upload audio", "TTS Audio"]
|
257 |
else:
|
258 |
audio_mode = ["Input path", "Upload audio", "TTS Audio"]
|
@@ -293,7 +278,7 @@ def create_vc_fn(model_name, tgt_sr, net_g, vc, if_f0, version, file_index):
|
|
293 |
return "You need to upload an audio", None
|
294 |
sampling_rate, audio = vc_upload
|
295 |
duration = audio.shape[0] / sampling_rate
|
296 |
-
if duration > 20 and
|
297 |
return "Please upload an audio file that is less than 20 seconds. If you need to generate a longer audio file, please use Colab.", None
|
298 |
audio = (audio / np.iinfo(audio.dtype).max).astype(np.float32)
|
299 |
if len(audio.shape) > 1:
|
@@ -348,7 +333,6 @@ def combine_vocal_and_inst(model_name, song_name, song_id, split_model, cover_so
|
|
348 |
result = subprocess.run(command.split(), stdout=subprocess.PIPE)
|
349 |
print(result.stdout.decode())
|
350 |
return output_path
|
351 |
-
|
352 |
|
353 |
def rvc_models(model_name):
|
354 |
global vc, net_g, index_files, tgt_sr, version
|
@@ -362,7 +346,7 @@ def rvc_models(model_name):
|
|
362 |
if pth_files == []:
|
363 |
print(f"Model [{model_count}/{len(w_dirs)}]: No Model file detected, skipping...")
|
364 |
continue
|
365 |
-
cpt = torch.load(pth_files[0]
|
366 |
tgt_sr = cpt["config"][-1]
|
367 |
cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0] # n_spk
|
368 |
if_f0 = cpt.get("f0", 1)
|
@@ -402,23 +386,49 @@ def rvc_models(model_name):
|
|
402 |
|
403 |
singers="您的专属AI歌手阵容:"
|
404 |
|
405 |
-
#@spaces.GPU(duration=80)
|
406 |
-
def infer_gpu(net_g, audio, f0_up_key, index_file, tgt_sr, version, f0_file=None):
|
407 |
-
|
408 |
-
from fairseq import checkpoint_utils
|
409 |
-
models, _, _ = checkpoint_utils.load_model_ensemble_and_task(
|
410 |
-
["hubert_base.pt"],
|
411 |
-
suffix="",
|
412 |
-
)
|
413 |
-
hubert_model = models[0]
|
414 |
-
hubert_model = hubert_model.to(config.device)
|
415 |
-
if config.is_half:
|
416 |
-
hubert_model = hubert_model.half()
|
417 |
-
else:
|
418 |
-
hubert_model = hubert_model.float()
|
419 |
-
hubert_model.eval()
|
420 |
|
421 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
422 |
hubert_model,
|
423 |
net_g,
|
424 |
0,
|
@@ -427,7 +437,7 @@ def infer_gpu(net_g, audio, f0_up_key, index_file, tgt_sr, version, f0_file=None
|
|
427 |
[0, 0, 0],
|
428 |
f0_up_key,
|
429 |
"rmvpe",
|
430 |
-
|
431 |
0.7,
|
432 |
1,
|
433 |
3,
|
@@ -438,8 +448,11 @@ def infer_gpu(net_g, audio, f0_up_key, index_file, tgt_sr, version, f0_file=None
|
|
438 |
0.33,
|
439 |
f0_file=None,
|
440 |
)
|
|
|
|
|
|
|
|
|
441 |
|
442 |
-
@spaces.GPU(duration=400)
|
443 |
def rvc_infer_music(url, model_name, song_name, split_model, f0_up_key, vocal_volume, inst_volume):
|
444 |
url = url.strip().replace(" ", "")
|
445 |
model_name = model_name.strip().replace(" ", "")
|
@@ -450,30 +463,19 @@ def rvc_infer_music(url, model_name, song_name, split_model, f0_up_key, vocal_vo
|
|
450 |
global singers
|
451 |
if model_name not in singers:
|
452 |
singers = singers+ ' '+ model_name
|
453 |
-
print("1.开始下载模型")
|
454 |
download_online_model(url, model_name)
|
455 |
-
rvc_models(zip_path)
|
456 |
song_name = song_name.strip().replace(" ", "")
|
457 |
video_identifier = search_bilibili(song_name)
|
458 |
song_id = get_bilibili_video_id(video_identifier)
|
459 |
-
|
460 |
-
|
461 |
-
|
462 |
-
|
463 |
-
|
464 |
-
|
465 |
-
|
466 |
-
print("2.1.开始去除BGM")
|
467 |
-
audio, sr = librosa.load(youtube_downloader(video_identifier, song_id, split_model)[0], sr=16000, mono=True)
|
468 |
-
print("2.2.开始推理")
|
469 |
-
#song_infer = infer_gpu(hubert_model, net_g, audio, f0_up_key, index_files[0], tgt_sr, version, f0_file=None)
|
470 |
-
song_infer = infer_gpu(net_g, audio, f0_up_key, index_files[0], tgt_sr, version, f0_file=None)
|
471 |
-
|
472 |
-
sf.write(song_name.strip()+zip_path+"AI翻唱.wav", song_infer, tgt_sr)
|
473 |
-
output_full_song = combine_vocal_and_inst(zip_path, song_name.strip(), song_id, split_model, song_name.strip()+zip_path+"AI翻唱.wav", vocal_volume, inst_volume)
|
474 |
-
os.remove(song_name.strip()+zip_path+"AI翻唱.wav")
|
475 |
return output_full_song, singers
|
476 |
-
|
477 |
app = gr.Blocks(theme="JohnSmith9982/small_and_pretty")
|
478 |
with app:
|
479 |
with gr.Tab("中文版"):
|
|
|
|
|
|
|
1 |
import spaces
|
|
|
2 |
import os
|
3 |
import glob
|
4 |
import json
|
|
|
15 |
import io
|
16 |
import wave
|
17 |
from datetime import datetime
|
|
|
18 |
import urllib.request
|
19 |
import zipfile
|
20 |
import shutil
|
|
|
41 |
from config import Config
|
42 |
config = Config()
|
43 |
logging.getLogger("numba").setLevel(logging.WARNING)
|
44 |
+
spaces = True #os.getenv("SYSTEM") == "spaces"
|
45 |
force_support = True
|
46 |
|
47 |
audio_mode = []
|
|
|
214 |
|
215 |
# Separate vocals
|
216 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
217 |
def youtube_downloader(
|
|
|
218 |
filename,
|
219 |
split_model,
|
220 |
):
|
221 |
+
|
|
|
|
|
|
|
|
|
|
|
222 |
audio_path = filename.strip() + ".wav"
|
223 |
|
224 |
+
# make dir output
|
225 |
os.makedirs("output", exist_ok=True)
|
226 |
|
227 |
+
if split_model=="UVR-HP2":
|
228 |
+
pre_fun = pre_fun_hp2
|
229 |
+
else:
|
230 |
+
pre_fun = pre_fun_hp5
|
231 |
+
|
232 |
+
pre_fun._path_audio_(audio_path, f"./output/{split_model}/{filename}/", f"./output/{split_model}/{filename}/", "wav")
|
233 |
os.remove(filename.strip()+".wav")
|
234 |
|
235 |
return f"./output/{split_model}/{filename}/vocal_{filename}.wav_10.wav", f"./output/{split_model}/{filename}/instrument_{filename}.wav_10.wav"
|
236 |
|
237 |
# Original code
|
238 |
|
239 |
+
if force_support is False or spaces is True:
|
240 |
+
if spaces is True:
|
241 |
audio_mode = ["Upload audio", "TTS Audio"]
|
242 |
else:
|
243 |
audio_mode = ["Input path", "Upload audio", "TTS Audio"]
|
|
|
278 |
return "You need to upload an audio", None
|
279 |
sampling_rate, audio = vc_upload
|
280 |
duration = audio.shape[0] / sampling_rate
|
281 |
+
if duration > 20 and spaces:
|
282 |
return "Please upload an audio file that is less than 20 seconds. If you need to generate a longer audio file, please use Colab.", None
|
283 |
audio = (audio / np.iinfo(audio.dtype).max).astype(np.float32)
|
284 |
if len(audio.shape) > 1:
|
|
|
333 |
result = subprocess.run(command.split(), stdout=subprocess.PIPE)
|
334 |
print(result.stdout.decode())
|
335 |
return output_path
|
|
|
336 |
|
337 |
def rvc_models(model_name):
|
338 |
global vc, net_g, index_files, tgt_sr, version
|
|
|
346 |
if pth_files == []:
|
347 |
print(f"Model [{model_count}/{len(w_dirs)}]: No Model file detected, skipping...")
|
348 |
continue
|
349 |
+
cpt = torch.load(pth_files[0])
|
350 |
tgt_sr = cpt["config"][-1]
|
351 |
cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0] # n_spk
|
352 |
if_f0 = cpt.get("f0", 1)
|
|
|
386 |
|
387 |
singers="您的专属AI歌手阵容:"
|
388 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
389 |
|
390 |
+
|
391 |
+
@spaces.GPU(duration=120)
|
392 |
+
def rvc_infer_music_gpu(zip_path, song_name, song_id, split_model, f0_up_key, vocal_volume, inst_volume):
|
393 |
+
from fairseq import checkpoint_utils
|
394 |
+
models, _, _ = checkpoint_utils.load_model_ensemble_and_task(
|
395 |
+
["hubert_base.pt"],
|
396 |
+
suffix="",
|
397 |
+
)
|
398 |
+
hubert_model = models[0]
|
399 |
+
hubert_model = hubert_model.to(config.device)
|
400 |
+
if config.is_half:
|
401 |
+
hubert_model = hubert_model.half()
|
402 |
+
else:
|
403 |
+
hubert_model = hubert_model.float()
|
404 |
+
hubert_model.eval()
|
405 |
+
rvc_models(zip_path)
|
406 |
+
|
407 |
+
if os.path.isdir(f"./output/{split_model}/{song_id}")==True:
|
408 |
+
audio, sr = librosa.load(f"./output/{split_model}/{song_id}/vocal_{song_id}.wav_10.wav", sr=16000, mono=True)
|
409 |
+
song_infer = vc.pipeline(
|
410 |
+
hubert_model,
|
411 |
+
net_g,
|
412 |
+
0,
|
413 |
+
audio,
|
414 |
+
"",
|
415 |
+
[0, 0, 0],
|
416 |
+
f0_up_key,
|
417 |
+
"rmvpe",
|
418 |
+
index_files[0],
|
419 |
+
0.7,
|
420 |
+
1,
|
421 |
+
3,
|
422 |
+
tgt_sr,
|
423 |
+
0,
|
424 |
+
0.25,
|
425 |
+
version,
|
426 |
+
0.33,
|
427 |
+
f0_file=None,
|
428 |
+
)
|
429 |
+
else:
|
430 |
+
audio, sr = librosa.load(youtube_downloader(song_id, split_model)[0], sr=16000, mono=True)
|
431 |
+
song_infer = vc.pipeline(
|
432 |
hubert_model,
|
433 |
net_g,
|
434 |
0,
|
|
|
437 |
[0, 0, 0],
|
438 |
f0_up_key,
|
439 |
"rmvpe",
|
440 |
+
index_files[0],
|
441 |
0.7,
|
442 |
1,
|
443 |
3,
|
|
|
448 |
0.33,
|
449 |
f0_file=None,
|
450 |
)
|
451 |
+
sf.write(song_name.strip()+zip_path+"AI翻唱.wav", song_infer, tgt_sr)
|
452 |
+
output_full_song = combine_vocal_and_inst(zip_path, song_name.strip(), song_id, split_model, song_name.strip()+zip_path+"AI翻唱.wav", vocal_volume, inst_volume)
|
453 |
+
os.remove(song_name.strip()+zip_path+"AI翻唱.wav")
|
454 |
+
return output_full_song, singers
|
455 |
|
|
|
456 |
def rvc_infer_music(url, model_name, song_name, split_model, f0_up_key, vocal_volume, inst_volume):
|
457 |
url = url.strip().replace(" ", "")
|
458 |
model_name = model_name.strip().replace(" ", "")
|
|
|
463 |
global singers
|
464 |
if model_name not in singers:
|
465 |
singers = singers+ ' '+ model_name
|
|
|
466 |
download_online_model(url, model_name)
|
|
|
467 |
song_name = song_name.strip().replace(" ", "")
|
468 |
video_identifier = search_bilibili(song_name)
|
469 |
song_id = get_bilibili_video_id(video_identifier)
|
470 |
+
print(video_identifier)
|
471 |
+
video_info = get_video_info(video_identifier)
|
472 |
+
print(video_info)
|
473 |
+
audio_content = get_response(video_info).content
|
474 |
+
with open(song_id.strip() + ".wav", mode="wb") as f:
|
475 |
+
f.write(audio_content)
|
476 |
+
output_full_song, singers = rvc_infer_music_gpu(zip_path, song_name, song_id, split_model, f0_up_key, vocal_volume, inst_volume)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
477 |
return output_full_song, singers
|
478 |
+
|
479 |
app = gr.Blocks(theme="JohnSmith9982/small_and_pretty")
|
480 |
with app:
|
481 |
with gr.Tab("中文版"):
|