kevinwang676 commited on
Commit
34c5f01
·
verified ·
1 Parent(s): 1af24ae

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +68 -66
app.py CHANGED
@@ -1,7 +1,4 @@
1
- # adapted for Zero GPU on Hugging Face
2
-
3
  import spaces
4
-
5
  import os
6
  import glob
7
  import json
@@ -18,7 +15,6 @@ import sys
18
  import io
19
  import wave
20
  from datetime import datetime
21
- #from fairseq import checkpoint_utils
22
  import urllib.request
23
  import zipfile
24
  import shutil
@@ -45,7 +41,7 @@ from vc_infer_pipeline import VC
45
  from config import Config
46
  config = Config()
47
  logging.getLogger("numba").setLevel(logging.WARNING)
48
- spaces_hf = True #os.getenv("SYSTEM") == "spaces"
49
  force_support = True
50
 
51
  audio_mode = []
@@ -218,41 +214,30 @@ pre_fun_hp5 = func(
218
 
219
  # Separate vocals
220
 
221
- # GPU needed
222
- #@spaces.GPU(duration=200)
223
- def get_vocal_gpu(audio_path, split_model, filename):
224
- if split_model=="UVR-HP2":
225
- pre_fun = pre_fun_hp2
226
- else:
227
- pre_fun = pre_fun_hp5
228
- return pre_fun._path_audio_(audio_path, f"./output/{split_model}/{filename}/", f"./output/{split_model}/{filename}/", "wav")
229
-
230
  def youtube_downloader(
231
- video_identifier,
232
  filename,
233
  split_model,
234
  ):
235
- print(video_identifier)
236
- video_info = get_video_info(video_identifier)
237
- print(video_info)
238
- audio_content = get_response(video_info).content
239
- with open(filename.strip() + ".wav", mode="wb") as f:
240
- f.write(audio_content)
241
  audio_path = filename.strip() + ".wav"
242
 
243
- # make dir output
244
  os.makedirs("output", exist_ok=True)
245
 
246
- get_vocal_gpu(audio_path, split_model, filename)
247
- #pre_fun._path_audio_(audio_path, f"./output/{split_model}/{filename}/", f"./output/{split_model}/{filename}/", "wav")
 
 
 
 
248
  os.remove(filename.strip()+".wav")
249
 
250
  return f"./output/{split_model}/{filename}/vocal_{filename}.wav_10.wav", f"./output/{split_model}/{filename}/instrument_{filename}.wav_10.wav"
251
 
252
  # Original code
253
 
254
- if force_support is False or spaces_hf is True:
255
- if spaces_hf is True:
256
  audio_mode = ["Upload audio", "TTS Audio"]
257
  else:
258
  audio_mode = ["Input path", "Upload audio", "TTS Audio"]
@@ -293,7 +278,7 @@ def create_vc_fn(model_name, tgt_sr, net_g, vc, if_f0, version, file_index):
293
  return "You need to upload an audio", None
294
  sampling_rate, audio = vc_upload
295
  duration = audio.shape[0] / sampling_rate
296
- if duration > 20 and spaces_hf:
297
  return "Please upload an audio file that is less than 20 seconds. If you need to generate a longer audio file, please use Colab.", None
298
  audio = (audio / np.iinfo(audio.dtype).max).astype(np.float32)
299
  if len(audio.shape) > 1:
@@ -348,7 +333,6 @@ def combine_vocal_and_inst(model_name, song_name, song_id, split_model, cover_so
348
  result = subprocess.run(command.split(), stdout=subprocess.PIPE)
349
  print(result.stdout.decode())
350
  return output_path
351
-
352
 
353
  def rvc_models(model_name):
354
  global vc, net_g, index_files, tgt_sr, version
@@ -362,7 +346,7 @@ def rvc_models(model_name):
362
  if pth_files == []:
363
  print(f"Model [{model_count}/{len(w_dirs)}]: No Model file detected, skipping...")
364
  continue
365
- cpt = torch.load(pth_files[0], map_location="cpu")
366
  tgt_sr = cpt["config"][-1]
367
  cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0] # n_spk
368
  if_f0 = cpt.get("f0", 1)
@@ -402,23 +386,49 @@ def rvc_models(model_name):
402
 
403
  singers="您的专属AI歌手阵容:"
404
 
405
- #@spaces.GPU(duration=80)
406
- def infer_gpu(net_g, audio, f0_up_key, index_file, tgt_sr, version, f0_file=None):
407
-
408
- from fairseq import checkpoint_utils
409
- models, _, _ = checkpoint_utils.load_model_ensemble_and_task(
410
- ["hubert_base.pt"],
411
- suffix="",
412
- )
413
- hubert_model = models[0]
414
- hubert_model = hubert_model.to(config.device)
415
- if config.is_half:
416
- hubert_model = hubert_model.half()
417
- else:
418
- hubert_model = hubert_model.float()
419
- hubert_model.eval()
420
 
421
- return vc.pipeline(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
422
  hubert_model,
423
  net_g,
424
  0,
@@ -427,7 +437,7 @@ def infer_gpu(net_g, audio, f0_up_key, index_file, tgt_sr, version, f0_file=None
427
  [0, 0, 0],
428
  f0_up_key,
429
  "rmvpe",
430
- index_file,
431
  0.7,
432
  1,
433
  3,
@@ -438,8 +448,11 @@ def infer_gpu(net_g, audio, f0_up_key, index_file, tgt_sr, version, f0_file=None
438
  0.33,
439
  f0_file=None,
440
  )
 
 
 
 
441
 
442
- @spaces.GPU(duration=400)
443
  def rvc_infer_music(url, model_name, song_name, split_model, f0_up_key, vocal_volume, inst_volume):
444
  url = url.strip().replace(" ", "")
445
  model_name = model_name.strip().replace(" ", "")
@@ -450,30 +463,19 @@ def rvc_infer_music(url, model_name, song_name, split_model, f0_up_key, vocal_vo
450
  global singers
451
  if model_name not in singers:
452
  singers = singers+ ' '+ model_name
453
- print("1.开始下载模型")
454
  download_online_model(url, model_name)
455
- rvc_models(zip_path)
456
  song_name = song_name.strip().replace(" ", "")
457
  video_identifier = search_bilibili(song_name)
458
  song_id = get_bilibili_video_id(video_identifier)
459
- if os.path.isdir(f"./output/{split_model}/{song_id}")==True:
460
- print("2.直接开始推理")
461
- audio, sr = librosa.load(f"./output/{split_model}/{song_id}/vocal_{song_id}.wav_10.wav", sr=16000, mono=True)
462
- #song_infer = infer_gpu(hubert_model, net_g, audio, f0_up_key, index_files[0], tgt_sr, version, f0_file=None)
463
- song_infer = infer_gpu(net_g, audio, f0_up_key, index_files[0], tgt_sr, version, f0_file=None)
464
-
465
- else:
466
- print("2.1.开始去除BGM")
467
- audio, sr = librosa.load(youtube_downloader(video_identifier, song_id, split_model)[0], sr=16000, mono=True)
468
- print("2.2.开始推理")
469
- #song_infer = infer_gpu(hubert_model, net_g, audio, f0_up_key, index_files[0], tgt_sr, version, f0_file=None)
470
- song_infer = infer_gpu(net_g, audio, f0_up_key, index_files[0], tgt_sr, version, f0_file=None)
471
-
472
- sf.write(song_name.strip()+zip_path+"AI翻唱.wav", song_infer, tgt_sr)
473
- output_full_song = combine_vocal_and_inst(zip_path, song_name.strip(), song_id, split_model, song_name.strip()+zip_path+"AI翻唱.wav", vocal_volume, inst_volume)
474
- os.remove(song_name.strip()+zip_path+"AI翻唱.wav")
475
  return output_full_song, singers
476
-
477
  app = gr.Blocks(theme="JohnSmith9982/small_and_pretty")
478
  with app:
479
  with gr.Tab("中文版"):
 
 
 
1
  import spaces
 
2
  import os
3
  import glob
4
  import json
 
15
  import io
16
  import wave
17
  from datetime import datetime
 
18
  import urllib.request
19
  import zipfile
20
  import shutil
 
41
  from config import Config
42
  config = Config()
43
  logging.getLogger("numba").setLevel(logging.WARNING)
44
+ spaces = True #os.getenv("SYSTEM") == "spaces"
45
  force_support = True
46
 
47
  audio_mode = []
 
214
 
215
  # Separate vocals
216
 
 
 
 
 
 
 
 
 
 
217
  def youtube_downloader(
 
218
  filename,
219
  split_model,
220
  ):
221
+
 
 
 
 
 
222
  audio_path = filename.strip() + ".wav"
223
 
224
+ # make dir output
225
  os.makedirs("output", exist_ok=True)
226
 
227
+ if split_model=="UVR-HP2":
228
+ pre_fun = pre_fun_hp2
229
+ else:
230
+ pre_fun = pre_fun_hp5
231
+
232
+ pre_fun._path_audio_(audio_path, f"./output/{split_model}/{filename}/", f"./output/{split_model}/{filename}/", "wav")
233
  os.remove(filename.strip()+".wav")
234
 
235
  return f"./output/{split_model}/{filename}/vocal_{filename}.wav_10.wav", f"./output/{split_model}/{filename}/instrument_{filename}.wav_10.wav"
236
 
237
  # Original code
238
 
239
+ if force_support is False or spaces is True:
240
+ if spaces is True:
241
  audio_mode = ["Upload audio", "TTS Audio"]
242
  else:
243
  audio_mode = ["Input path", "Upload audio", "TTS Audio"]
 
278
  return "You need to upload an audio", None
279
  sampling_rate, audio = vc_upload
280
  duration = audio.shape[0] / sampling_rate
281
+ if duration > 20 and spaces:
282
  return "Please upload an audio file that is less than 20 seconds. If you need to generate a longer audio file, please use Colab.", None
283
  audio = (audio / np.iinfo(audio.dtype).max).astype(np.float32)
284
  if len(audio.shape) > 1:
 
333
  result = subprocess.run(command.split(), stdout=subprocess.PIPE)
334
  print(result.stdout.decode())
335
  return output_path
 
336
 
337
  def rvc_models(model_name):
338
  global vc, net_g, index_files, tgt_sr, version
 
346
  if pth_files == []:
347
  print(f"Model [{model_count}/{len(w_dirs)}]: No Model file detected, skipping...")
348
  continue
349
+ cpt = torch.load(pth_files[0])
350
  tgt_sr = cpt["config"][-1]
351
  cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0] # n_spk
352
  if_f0 = cpt.get("f0", 1)
 
386
 
387
  singers="您的专属AI歌手阵容:"
388
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
389
 
390
+
391
+ @spaces.GPU(duration=120)
392
+ def rvc_infer_music_gpu(zip_path, song_name, song_id, split_model, f0_up_key, vocal_volume, inst_volume):
393
+ from fairseq import checkpoint_utils
394
+ models, _, _ = checkpoint_utils.load_model_ensemble_and_task(
395
+ ["hubert_base.pt"],
396
+ suffix="",
397
+ )
398
+ hubert_model = models[0]
399
+ hubert_model = hubert_model.to(config.device)
400
+ if config.is_half:
401
+ hubert_model = hubert_model.half()
402
+ else:
403
+ hubert_model = hubert_model.float()
404
+ hubert_model.eval()
405
+ rvc_models(zip_path)
406
+
407
+ if os.path.isdir(f"./output/{split_model}/{song_id}")==True:
408
+ audio, sr = librosa.load(f"./output/{split_model}/{song_id}/vocal_{song_id}.wav_10.wav", sr=16000, mono=True)
409
+ song_infer = vc.pipeline(
410
+ hubert_model,
411
+ net_g,
412
+ 0,
413
+ audio,
414
+ "",
415
+ [0, 0, 0],
416
+ f0_up_key,
417
+ "rmvpe",
418
+ index_files[0],
419
+ 0.7,
420
+ 1,
421
+ 3,
422
+ tgt_sr,
423
+ 0,
424
+ 0.25,
425
+ version,
426
+ 0.33,
427
+ f0_file=None,
428
+ )
429
+ else:
430
+ audio, sr = librosa.load(youtube_downloader(song_id, split_model)[0], sr=16000, mono=True)
431
+ song_infer = vc.pipeline(
432
  hubert_model,
433
  net_g,
434
  0,
 
437
  [0, 0, 0],
438
  f0_up_key,
439
  "rmvpe",
440
+ index_files[0],
441
  0.7,
442
  1,
443
  3,
 
448
  0.33,
449
  f0_file=None,
450
  )
451
+ sf.write(song_name.strip()+zip_path+"AI翻唱.wav", song_infer, tgt_sr)
452
+ output_full_song = combine_vocal_and_inst(zip_path, song_name.strip(), song_id, split_model, song_name.strip()+zip_path+"AI翻唱.wav", vocal_volume, inst_volume)
453
+ os.remove(song_name.strip()+zip_path+"AI翻唱.wav")
454
+ return output_full_song, singers
455
 
 
456
  def rvc_infer_music(url, model_name, song_name, split_model, f0_up_key, vocal_volume, inst_volume):
457
  url = url.strip().replace(" ", "")
458
  model_name = model_name.strip().replace(" ", "")
 
463
  global singers
464
  if model_name not in singers:
465
  singers = singers+ ' '+ model_name
 
466
  download_online_model(url, model_name)
 
467
  song_name = song_name.strip().replace(" ", "")
468
  video_identifier = search_bilibili(song_name)
469
  song_id = get_bilibili_video_id(video_identifier)
470
+ print(video_identifier)
471
+ video_info = get_video_info(video_identifier)
472
+ print(video_info)
473
+ audio_content = get_response(video_info).content
474
+ with open(song_id.strip() + ".wav", mode="wb") as f:
475
+ f.write(audio_content)
476
+ output_full_song, singers = rvc_infer_music_gpu(zip_path, song_name, song_id, split_model, f0_up_key, vocal_volume, inst_volume)
 
 
 
 
 
 
 
 
 
477
  return output_full_song, singers
478
+
479
  app = gr.Blocks(theme="JohnSmith9982/small_and_pretty")
480
  with app:
481
  with gr.Tab("中文版"):