import subprocess, torch, os, traceback, sys, warnings, shutil, numpy as np from mega import Mega os.environ["no_proxy"] = "localhost, 127.0.0.1, ::1" import threading from time import sleep from subprocess import Popen from typing import Any import faiss import spaces from random import shuffle import json, datetime, requests from gtts import gTTS now_dir = os.getcwd() sys.path.append(now_dir) tmp = os.path.join(now_dir, "TEMP") shutil.rmtree(tmp, ignore_errors=True) shutil.rmtree("%s/runtime/Lib/site-packages/infer_pack" % (now_dir), ignore_errors=True) os.makedirs(tmp, exist_ok=True) os.makedirs(os.path.join(now_dir, "logs"), exist_ok=True) os.makedirs(os.path.join(now_dir, "weights"), exist_ok=True) os.environ["TEMP"] = tmp warnings.filterwarnings("ignore") torch.manual_seed(114514) from i18n import I18nAuto import edge_tts, asyncio from ilariatts import tts_order_voice language_dict = tts_order_voice ilariavoices = language_dict.keys() import signal import math from utils import load_audio, CSVutil global DoFormant, Quefrency, Timbre if not os.path.isdir('csvdb/'): os.makedirs('csvdb') frmnt, stp = open("csvdb/formanting.csv", 'w'), open("csvdb/stop.csv", 'w') frmnt.close() stp.close() try: DoFormant, Quefrency, Timbre = CSVutil('csvdb/formanting.csv', 'r', 'formanting') DoFormant = ( lambda DoFormant: True if DoFormant.lower() == 'true' else (False if DoFormant.lower() == 'false' else DoFormant) )(DoFormant) except (ValueError, TypeError, IndexError): DoFormant, Quefrency, Timbre = False, 1.0, 1.0 CSVutil('csvdb/formanting.csv', 'w+', 'formanting', DoFormant, Quefrency, Timbre) def download_models(): # Download hubert base model if not present if not os.path.isfile('./hubert_base.pt'): response = requests.get('https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/hubert_base.pt') if response.status_code == 200: with open('./hubert_base.pt', 'wb') as f: f.write(response.content) print("Downloaded hubert base model file successfully. File saved to ./hubert_base.pt.") else: raise Exception("Failed to download hubert base model file. Status code: " + str(response.status_code) + ".") # Download rmvpe model if not present if not os.path.isfile('./rmvpe.pt'): response = requests.get('https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/rmvpe.pt?download=true') if response.status_code == 200: with open('./rmvpe.pt', 'wb') as f: f.write(response.content) print("Downloaded rmvpe model file successfully. File saved to ./rmvpe.pt.") else: raise Exception("Failed to download rmvpe model file. Status code: " + str(response.status_code) + ".") download_models() print("\n-------------------------------\nRVC v2 Easy GUI (Local Edition)\n-------------------------------\n") def formant_apply(qfrency, tmbre): Quefrency = qfrency Timbre = tmbre DoFormant = True CSVutil('csvdb/formanting.csv', 'w+', 'formanting', DoFormant, qfrency, tmbre) return ({"value": Quefrency, "__type__": "update"}, {"value": Timbre, "__type__": "update"}) def get_fshift_presets(): fshift_presets_list = [] for dirpath, _, filenames in os.walk("./formantshiftcfg/"): for filename in filenames: if filename.endswith(".txt"): fshift_presets_list.append(os.path.join(dirpath,filename).replace('\\','/')) if len(fshift_presets_list) > 0: return fshift_presets_list else: return '' def formant_enabled(cbox, qfrency, tmbre, frmntapply, formantpreset, formant_refresh_button): if (cbox): DoFormant = True CSVutil('csvdb/formanting.csv', 'w+', 'formanting', DoFormant, qfrency, tmbre) #print(f"is checked? - {cbox}\ngot {DoFormant}") return ( {"value": True, "__type__": "update"}, {"visible": True, "__type__": "update"}, {"visible": True, "__type__": "update"}, {"visible": True, "__type__": "update"}, {"visible": True, "__type__": "update"}, {"visible": True, "__type__": "update"}, ) else: DoFormant = False CSVutil('csvdb/formanting.csv', 'w+', 'formanting', DoFormant, qfrency, tmbre) #print(f"is checked? - {cbox}\ngot {DoFormant}") return ( {"value": False, "__type__": "update"}, {"visible": False, "__type__": "update"}, {"visible": False, "__type__": "update"}, {"visible": False, "__type__": "update"}, {"visible": False, "__type__": "update"}, {"visible": False, "__type__": "update"}, {"visible": False, "__type__": "update"}, ) def preset_apply(preset, qfer, tmbr): if str(preset) != '': with open(str(preset), 'r') as p: content = p.readlines() qfer, tmbr = content[0].split('\n')[0], content[1] formant_apply(qfer, tmbr) else: pass return ({"value": qfer, "__type__": "update"}, {"value": tmbr, "__type__": "update"}) def update_fshift_presets(preset, qfrency, tmbre): qfrency, tmbre = preset_apply(preset, qfrency, tmbre) if (str(preset) != ''): with open(str(preset), 'r') as p: content = p.readlines() qfrency, tmbre = content[0].split('\n')[0], content[1] formant_apply(qfrency, tmbre) else: pass return ( {"choices": get_fshift_presets(), "__type__": "update"}, {"value": qfrency, "__type__": "update"}, {"value": tmbre, "__type__": "update"}, ) i18n = I18nAuto() #i18n.print() # 判断是否有能用来训练和加速推理的N卡 ngpu = torch.cuda.device_count() gpu_infos = [] mem = [] if (not torch.cuda.is_available()) or ngpu == 0: if_gpu_ok = False else: if_gpu_ok = False for i in range(ngpu): gpu_name = torch.cuda.get_device_name(i) if ( "10" in gpu_name or "16" in gpu_name or "20" in gpu_name or "30" in gpu_name or "40" in gpu_name or "A2" in gpu_name.upper() or "A3" in gpu_name.upper() or "A4" in gpu_name.upper() or "P4" in gpu_name.upper() or "A50" in gpu_name.upper() or "A60" in gpu_name.upper() or "70" in gpu_name or "80" in gpu_name or "90" in gpu_name or "M4" in gpu_name.upper() or "T4" in gpu_name.upper() or "TITAN" in gpu_name.upper() or "ZERO" in gpu_name.upper() ): # A10#A100#V100#A40#P40#M40#K80#A4500 if_gpu_ok = True # 至少有一张能用的N卡 gpu_infos.append("%s\t%s" % (i, gpu_name)) mem.append( int( torch.cuda.get_device_properties(i).total_memory / 1024 / 1024 / 1024 + 0.4 ) ) if if_gpu_ok == True and len(gpu_infos) > 0: gpu_info = "\n".join(gpu_infos) default_batch_size = min(mem) // 2 else: gpu_info = i18n("很遗憾您这没有能用的显卡来支持您训练") default_batch_size = 1 gpus = "-".join([i[0] for i in gpu_infos]) from lib.infer_pack.models import ( SynthesizerTrnMs256NSFsid, SynthesizerTrnMs256NSFsid_nono, SynthesizerTrnMs768NSFsid, SynthesizerTrnMs768NSFsid_nono, ) import soundfile as sf from fairseq import checkpoint_utils import gradio as gr import logging from vc_infer_pipeline import VC from config import Config config = Config() # from trainset_preprocess_pipeline import PreProcess logging.getLogger("numba").setLevel(logging.WARNING) hubert_model = None def load_hubert(): global hubert_model models, _, _ = checkpoint_utils.load_model_ensemble_and_task( ["hubert_base.pt"], suffix="", ) hubert_model = models[0] hubert_model = hubert_model.to(config.device) if config.is_half: hubert_model = hubert_model.half() else: hubert_model = hubert_model.float() hubert_model.eval() weight_root = "weights" index_root = "logs" names = [] for name in os.listdir(weight_root): if name.endswith(".pth"): names.append(name) index_paths = [] for root, dirs, files in os.walk(index_root, topdown=False): for name in files: if name.endswith(".index") and "trained" not in name: index_paths.append("%s/%s" % (root, name)) @spaces.GPU def vc_single( sid, input_audio_path, f0_up_key, f0_file, f0_method, file_index, #file_index2, # file_big_npy, index_rate, filter_radius, resample_sr, rms_mix_rate, protect, crepe_hop_length, ): # spk_item, input_audio0, vc_transform0,f0_file,f0method0 global tgt_sr, net_g, vc, hubert_model, version if input_audio_path is None: return "You need to upload an audio", None f0_up_key = int(f0_up_key) try: audio = load_audio(input_audio_path, 16000, DoFormant, Quefrency, Timbre) audio_max = np.abs(audio).max() / 0.95 if audio_max > 1: audio /= audio_max times = [0, 0, 0] if hubert_model == None: load_hubert() if_f0 = cpt.get("f0", 1) file_index = ( ( file_index.strip(" ") .strip('"') .strip("\n") .strip('"') .strip(" ") .replace("trained", "added") ) ) # 防止小白写错,自动帮他替换掉 # file_big_npy = ( # file_big_npy.strip(" ").strip('"').strip("\n").strip('"').strip(" ") # ) audio_opt = vc.pipeline( hubert_model, net_g, sid, audio, input_audio_path, times, f0_up_key, f0_method, file_index, # file_big_npy, index_rate, if_f0, filter_radius, tgt_sr, resample_sr, rms_mix_rate, version, protect, crepe_hop_length, f0_file=f0_file, ) if resample_sr >= 16000 and tgt_sr != resample_sr: tgt_sr = resample_sr index_info = ( "Using index:%s." % file_index if os.path.exists(file_index) else "Index not used." ) return "Success.\n %s\nTime:\n npy:%ss, f0:%ss, infer:%ss" % ( index_info, times[0], times[1], times[2], ), (tgt_sr, audio_opt) except: info = traceback.format_exc() print(info) return info, (None, None) @spaces.GPU(duration=60 * 2) def vc_multi( sid, dir_path, opt_root, paths, f0_up_key, f0_method, file_index, file_index2, # file_big_npy, index_rate, filter_radius, resample_sr, rms_mix_rate, protect, format1, crepe_hop_length, ): try: dir_path = ( dir_path.strip(" ").strip('"').strip("\n").strip('"').strip(" ") ) # 防止小白拷路径头尾带了空格和"和回车 opt_root = opt_root.strip(" ").strip('"').strip("\n").strip('"').strip(" ") os.makedirs(opt_root, exist_ok=True) try: if dir_path != "": paths = [os.path.join(dir_path, name) for name in os.listdir(dir_path)] else: paths = [path.name for path in paths] except: traceback.print_exc() paths = [path.name for path in paths] infos = [] for path in paths: info, opt = vc_single( sid, path, f0_up_key, None, f0_method, file_index, # file_big_npy, index_rate, filter_radius, resample_sr, rms_mix_rate, protect, crepe_hop_length ) if "Success" in info: try: tgt_sr, audio_opt = opt if format1 in ["wav", "flac"]: sf.write( "%s/%s.%s" % (opt_root, os.path.basename(path), format1), audio_opt, tgt_sr, ) else: path = "%s/%s.wav" % (opt_root, os.path.basename(path)) sf.write( path, audio_opt, tgt_sr, ) if os.path.exists(path): os.system( "ffmpeg -i %s -vn %s -q:a 2 -y" % (path, path[:-4] + ".%s" % format1) ) except: info += traceback.format_exc() infos.append("%s->%s" % (os.path.basename(path), info)) yield "\n".join(infos) yield "\n".join(infos) except: yield traceback.format_exc() # 一个选项卡全局只能有一个音色 def get_vc(sid): global n_spk, tgt_sr, net_g, vc, cpt, version if sid == "" or sid == []: global hubert_model if hubert_model != None: # 考虑到轮询, 需要加个判断看是否 sid 是由有模型切换到无模型的 print("clean_empty_cache") del net_g, n_spk, vc, hubert_model, tgt_sr # ,cpt hubert_model = net_g = n_spk = vc = hubert_model = tgt_sr = None if torch.cuda.is_available(): torch.cuda.empty_cache() ###楼下不这么折腾清理不干净 if_f0 = cpt.get("f0", 1) version = cpt.get("version", "v1") if version == "v1": if if_f0 == 1: net_g = SynthesizerTrnMs256NSFsid( *cpt["config"], is_half=config.is_half ) else: net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"]) elif version == "v2": if if_f0 == 1: net_g = SynthesizerTrnMs768NSFsid( *cpt["config"], is_half=config.is_half ) else: net_g = SynthesizerTrnMs768NSFsid_nono(*cpt["config"]) del net_g, cpt if torch.cuda.is_available(): torch.cuda.empty_cache() cpt = None return {"visible": False, "__type__": "update"} person = "%s/%s" % (weight_root, sid) print("loading %s" % person) cpt = torch.load(person, map_location="cpu") tgt_sr = cpt["config"][-1] cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0] # n_spk if_f0 = cpt.get("f0", 1) version = cpt.get("version", "v1") if version == "v1": if if_f0 == 1: net_g = SynthesizerTrnMs256NSFsid(*cpt["config"], is_half=config.is_half) else: net_g = SynthesizerTrnMs256NSFsid_nono(*cpt["config"]) elif version == "v2": if if_f0 == 1: net_g = SynthesizerTrnMs768NSFsid(*cpt["config"], is_half=config.is_half) else: net_g = SynthesizerTrnMs768NSFsid_nono(*cpt["config"]) del net_g.enc_q print(net_g.load_state_dict(cpt["weight"], strict=False)) net_g.eval().to(config.device) if config.is_half: net_g = net_g.half() else: net_g = net_g.float() vc = VC(tgt_sr, config) n_spk = cpt["config"][-3] return {"visible": False, "maximum": n_spk, "__type__": "update"} def change_choices(): names = [] for name in os.listdir(weight_root): if name.endswith(".pth"): names.append(name) index_paths = [] for root, dirs, files in os.walk(index_root, topdown=False): for name in files: if name.endswith(".index") and "trained" not in name: index_paths.append("%s/%s" % (root, name)) return {"choices": sorted(names), "__type__": "update"}, { "choices": sorted(index_paths), "__type__": "update", } def clean(): return {"value": "", "__type__": "update"} sr_dict = { "32k": 32000, "40k": 40000, "48k": 48000, } def if_done(done, p): while 1: if p.poll() == None: sleep(0.5) else: break done[0] = True def if_done_multi(done, ps): while 1: # poll==None代表进程未结束 # 只要有一个进程未结束都不停 flag = 1 for p in ps: if p.poll() == None: flag = 0 sleep(0.5) break if flag == 1: break done[0] = True def preprocess_dataset(trainset_dir, exp_dir, sr, n_p): sr = sr_dict[sr] os.makedirs("%s/logs/%s" % (now_dir, exp_dir), exist_ok=True) f = open("%s/logs/%s/preprocess.log" % (now_dir, exp_dir), "w") f.close() cmd = ( config.python_cmd + " trainset_preprocess_pipeline_print.py %s %s %s %s/logs/%s " % (trainset_dir, sr, n_p, now_dir, exp_dir) + str(config.noparallel) ) print(cmd) p = Popen(cmd, shell=True) # , stdin=PIPE, stdout=PIPE,stderr=PIPE,cwd=now_dir ###煞笔gr, popen read都非得全跑完了再一次性读取, 不用gr就正常读一句输出一句;只能额外弄出一个文本流定时读 done = [False] threading.Thread( target=if_done, args=( done, p, ), ).start() while 1: with open("%s/logs/%s/preprocess.log" % (now_dir, exp_dir), "r") as f: yield (f.read()) sleep(1) if done[0] == True: break with open("%s/logs/%s/preprocess.log" % (now_dir, exp_dir), "r") as f: log = f.read() print(log) yield log # but2.click(extract_f0,[gpus6,np7,f0method8,if_f0_3,trainset_dir4],[info2]) def extract_f0_feature(gpus, n_p, f0method, if_f0, exp_dir, version19, echl): gpus = gpus.split("-") os.makedirs("%s/logs/%s" % (now_dir, exp_dir), exist_ok=True) f = open("%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir), "w") f.close() if if_f0: cmd = config.python_cmd + " extract_f0_print.py %s/logs/%s %s %s %s" % ( now_dir, exp_dir, n_p, f0method, echl, ) print(cmd) p = Popen(cmd, shell=True, cwd=now_dir) # , stdin=PIPE, stdout=PIPE,stderr=PIPE ###煞笔gr, popen read都非得全跑完了再一次性读取, 不用gr就正常读一句输出一句;只能额外弄出一个文本流定时读 done = [False] threading.Thread( target=if_done, args=( done, p, ), ).start() while 1: with open( "%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir), "r" ) as f: yield (f.read()) sleep(1) if done[0] == True: break with open("%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir), "r") as f: log = f.read() print(log) yield log ####对不同part分别开多进程 """ n_part=int(sys.argv[1]) i_part=int(sys.argv[2]) i_gpu=sys.argv[3] exp_dir=sys.argv[4] os.environ["CUDA_VISIBLE_DEVICES"]=str(i_gpu) """ leng = len(gpus) ps = [] for idx, n_g in enumerate(gpus): cmd = ( config.python_cmd + " extract_feature_print.py %s %s %s %s %s/logs/%s %s" % ( config.device, leng, idx, n_g, now_dir, exp_dir, version19, ) ) print(cmd) p = Popen( cmd, shell=True, cwd=now_dir ) # , shell=True, stdin=PIPE, stdout=PIPE, stderr=PIPE, cwd=now_dir ps.append(p) ###煞笔gr, popen read都非得全跑完了再一次性读取, 不用gr就正常读一句输出一句;只能额外弄出一个文本流定时读 done = [False] threading.Thread( target=if_done_multi, args=( done, ps, ), ).start() while 1: with open("%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir), "r") as f: yield (f.read()) sleep(1) if done[0] == True: break with open("%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir), "r") as f: log = f.read() print(log) yield log def change_sr2(sr2, if_f0_3, version19): path_str = "" if version19 == "v1" else "_v2" f0_str = "f0" if if_f0_3 else "" if_pretrained_generator_exist = os.access("pretrained%s/%sG%s.pth" % (path_str, f0_str, sr2), os.F_OK) if_pretrained_discriminator_exist = os.access("pretrained%s/%sD%s.pth" % (path_str, f0_str, sr2), os.F_OK) if (if_pretrained_generator_exist == False): print("pretrained%s/%sG%s.pth" % (path_str, f0_str, sr2), "not exist, will not use pretrained model") if (if_pretrained_discriminator_exist == False): print("pretrained%s/%sD%s.pth" % (path_str, f0_str, sr2), "not exist, will not use pretrained model") return ( ("pretrained%s/%sG%s.pth" % (path_str, f0_str, sr2)) if if_pretrained_generator_exist else "", ("pretrained%s/%sD%s.pth" % (path_str, f0_str, sr2)) if if_pretrained_discriminator_exist else "", {"visible": True, "__type__": "update"} ) def change_version19(sr2, if_f0_3, version19): path_str = "" if version19 == "v1" else "_v2" f0_str = "f0" if if_f0_3 else "" if_pretrained_generator_exist = os.access("pretrained%s/%sG%s.pth" % (path_str, f0_str, sr2), os.F_OK) if_pretrained_discriminator_exist = os.access("pretrained%s/%sD%s.pth" % (path_str, f0_str, sr2), os.F_OK) if (if_pretrained_generator_exist == False): print("pretrained%s/%sG%s.pth" % (path_str, f0_str, sr2), "not exist, will not use pretrained model") if (if_pretrained_discriminator_exist == False): print("pretrained%s/%sD%s.pth" % (path_str, f0_str, sr2), "not exist, will not use pretrained model") return ( ("pretrained%s/%sG%s.pth" % (path_str, f0_str, sr2)) if if_pretrained_generator_exist else "", ("pretrained%s/%sD%s.pth" % (path_str, f0_str, sr2)) if if_pretrained_discriminator_exist else "", ) def change_f0(if_f0_3, sr2, version19): # f0method8,pretrained_G14,pretrained_D15 path_str = "" if version19 == "v1" else "_v2" if_pretrained_generator_exist = os.access("pretrained%s/f0G%s.pth" % (path_str, sr2), os.F_OK) if_pretrained_discriminator_exist = os.access("pretrained%s/f0D%s.pth" % (path_str, sr2), os.F_OK) if (if_pretrained_generator_exist == False): print("pretrained%s/f0G%s.pth" % (path_str, sr2), "not exist, will not use pretrained model") if (if_pretrained_discriminator_exist == False): print("pretrained%s/f0D%s.pth" % (path_str, sr2), "not exist, will not use pretrained model") if if_f0_3: return ( {"visible": True, "__type__": "update"}, "pretrained%s/f0G%s.pth" % (path_str, sr2) if if_pretrained_generator_exist else "", "pretrained%s/f0D%s.pth" % (path_str, sr2) if if_pretrained_discriminator_exist else "", ) return ( {"visible": False, "__type__": "update"}, ("pretrained%s/G%s.pth" % (path_str, sr2)) if if_pretrained_generator_exist else "", ("pretrained%s/D%s.pth" % (path_str, sr2)) if if_pretrained_discriminator_exist else "", ) global log_interval def set_log_interval(exp_dir, batch_size12): log_interval = 1 folder_path = os.path.join(exp_dir, "1_16k_wavs") if os.path.exists(folder_path) and os.path.isdir(folder_path): wav_files = [f for f in os.listdir(folder_path) if f.endswith(".wav")] if wav_files: sample_size = len(wav_files) log_interval = math.ceil(sample_size / batch_size12) if log_interval > 1: log_interval += 1 return log_interval # but3.click(click_train,[exp_dir1,sr2,if_f0_3,save_epoch10,total_epoch11,batch_size12,if_save_latest13,pretrained_G14,pretrained_D15,gpus16]) def click_train( exp_dir1, sr2, if_f0_3, spk_id5, save_epoch10, total_epoch11, batch_size12, if_save_latest13, pretrained_G14, pretrained_D15, gpus16, if_cache_gpu17, if_save_every_weights18, version19, ): CSVutil('csvdb/stop.csv', 'w+', 'formanting', False) # 生成filelist exp_dir = "%s/logs/%s" % (now_dir, exp_dir1) os.makedirs(exp_dir, exist_ok=True) gt_wavs_dir = "%s/0_gt_wavs" % (exp_dir) feature_dir = ( "%s/3_feature256" % (exp_dir) if version19 == "v1" else "%s/3_feature768" % (exp_dir) ) log_interval = set_log_interval(exp_dir, batch_size12) if if_f0_3: f0_dir = "%s/2a_f0" % (exp_dir) f0nsf_dir = "%s/2b-f0nsf" % (exp_dir) names = ( set([name.split(".")[0] for name in os.listdir(gt_wavs_dir)]) & set([name.split(".")[0] for name in os.listdir(feature_dir)]) & set([name.split(".")[0] for name in os.listdir(f0_dir)]) & set([name.split(".")[0] for name in os.listdir(f0nsf_dir)]) ) else: names = set([name.split(".")[0] for name in os.listdir(gt_wavs_dir)]) & set( [name.split(".")[0] for name in os.listdir(feature_dir)] ) opt = [] for name in names: if if_f0_3: opt.append( "%s/%s.wav|%s/%s.npy|%s/%s.wav.npy|%s/%s.wav.npy|%s" % ( gt_wavs_dir.replace("\\", "\\\\"), name, feature_dir.replace("\\", "\\\\"), name, f0_dir.replace("\\", "\\\\"), name, f0nsf_dir.replace("\\", "\\\\"), name, spk_id5, ) ) else: opt.append( "%s/%s.wav|%s/%s.npy|%s" % ( gt_wavs_dir.replace("\\", "\\\\"), name, feature_dir.replace("\\", "\\\\"), name, spk_id5, ) ) fea_dim = 256 if version19 == "v1" else 768 if if_f0_3: for _ in range(2): opt.append( "%s/logs/mute/0_gt_wavs/mute%s.wav|%s/logs/mute/3_feature%s/mute.npy|%s/logs/mute/2a_f0/mute.wav.npy|%s/logs/mute/2b-f0nsf/mute.wav.npy|%s" % (now_dir, sr2, now_dir, fea_dim, now_dir, now_dir, spk_id5) ) else: for _ in range(2): opt.append( "%s/logs/mute/0_gt_wavs/mute%s.wav|%s/logs/mute/3_feature%s/mute.npy|%s" % (now_dir, sr2, now_dir, fea_dim, spk_id5) ) shuffle(opt) with open("%s/filelist.txt" % exp_dir, "w") as f: f.write("\n".join(opt)) print("write filelist done") # 生成config#无需生成config # cmd = python_cmd + " train_nsf_sim_cache_sid_load_pretrain.py -e mi-test -sr 40k -f0 1 -bs 4 -g 0 -te 10 -se 5 -pg pretrained/f0G40k.pth -pd pretrained/f0D40k.pth -l 1 -c 0" print("use gpus:", gpus16) if pretrained_G14 == "": print("no pretrained Generator") if pretrained_D15 == "": print("no pretrained Discriminator") if gpus16: cmd = ( config.python_cmd + " train_nsf_sim_cache_sid_load_pretrain.py -e %s -sr %s -f0 %s -bs %s -g %s -te %s -se %s %s %s -l %s -c %s -sw %s -v %s -li %s" % ( exp_dir1, sr2, 1 if if_f0_3 else 0, batch_size12, gpus16, total_epoch11, save_epoch10, ("-pg %s" % pretrained_G14) if pretrained_G14 != "" else "", ("-pd %s" % pretrained_D15) if pretrained_D15 != "" else "", 1 if if_save_latest13 == True else 0, 1 if if_cache_gpu17 == True else 0, 1 if if_save_every_weights18 == True else 0, version19, log_interval, ) ) else: cmd = ( config.python_cmd + " train_nsf_sim_cache_sid_load_pretrain.py -e %s -sr %s -f0 %s -bs %s -te %s -se %s %s %s -l %s -c %s -sw %s -v %s -li %s" % ( exp_dir1, sr2, 1 if if_f0_3 else 0, batch_size12, total_epoch11, save_epoch10, ("-pg %s" % pretrained_G14) if pretrained_G14 != "" else "\b", ("-pd %s" % pretrained_D15) if pretrained_D15 != "" else "\b", 1 if if_save_latest13 == True else 0, 1 if if_cache_gpu17 == True else 0, 1 if if_save_every_weights18 == True else 0, version19, log_interval, ) ) print(cmd) p = Popen(cmd, shell=True, cwd=now_dir) global PID PID = p.pid p.wait() return ("训练结束, 您可查看控制台训练日志或实验文件夹下的train.log", {"visible": False, "__type__": "update"}, {"visible": True, "__type__": "update"}) # but4.click(train_index, [exp_dir1], info3) def train_index(exp_dir1, version19): exp_dir = "%s/logs/%s" % (now_dir, exp_dir1) os.makedirs(exp_dir, exist_ok=True) feature_dir = ( "%s/3_feature256" % (exp_dir) if version19 == "v1" else "%s/3_feature768" % (exp_dir) ) if os.path.exists(feature_dir) == False: return "请先进行特征提取!" listdir_res = list(os.listdir(feature_dir)) if len(listdir_res) == 0: return "请先进行特征提取!" npys = [] for name in sorted(listdir_res): phone = np.load("%s/%s" % (feature_dir, name)) npys.append(phone) big_npy = np.concatenate(npys, 0) big_npy_idx = np.arange(big_npy.shape[0]) np.random.shuffle(big_npy_idx) big_npy = big_npy[big_npy_idx] np.save("%s/total_fea.npy" % exp_dir, big_npy) # n_ivf = big_npy.shape[0] // 39 n_ivf = min(int(16 * np.sqrt(big_npy.shape[0])), big_npy.shape[0] // 39) infos = [] infos.append("%s,%s" % (big_npy.shape, n_ivf)) yield "\n".join(infos) index = faiss.index_factory(256 if version19 == "v1" else 768, "IVF%s,Flat" % n_ivf) # index = faiss.index_factory(256if version19=="v1"else 768, "IVF%s,PQ128x4fs,RFlat"%n_ivf) infos.append("training") yield "\n".join(infos) index_ivf = faiss.extract_index_ivf(index) # index_ivf.nprobe = 1 index.train(big_npy) faiss.write_index( index, "%s/trained_IVF%s_Flat_nprobe_%s_%s_%s.index" % (exp_dir, n_ivf, index_ivf.nprobe, exp_dir1, version19), ) # faiss.write_index(index, '%s/trained_IVF%s_Flat_FastScan_%s.index'%(exp_dir,n_ivf,version19)) infos.append("adding") yield "\n".join(infos) batch_size_add = 8192 for i in range(0, big_npy.shape[0], batch_size_add): index.add(big_npy[i : i + batch_size_add]) faiss.write_index( index, "%s/added_IVF%s_Flat_nprobe_%s_%s_%s.index" % (exp_dir, n_ivf, index_ivf.nprobe, exp_dir1, version19), ) infos.append( "成功构建索引,added_IVF%s_Flat_nprobe_%s_%s_%s.index" % (n_ivf, index_ivf.nprobe, exp_dir1, version19) ) # faiss.write_index(index, '%s/added_IVF%s_Flat_FastScan_%s.index'%(exp_dir,n_ivf,version19)) # infos.append("成功构建索引,added_IVF%s_Flat_FastScan_%s.index"%(n_ivf,version19)) yield "\n".join(infos) # but5.click(train1key, [exp_dir1, sr2, if_f0_3, trainset_dir4, spk_id5, gpus6, np7, f0method8, save_epoch10, total_epoch11, batch_size12, if_save_latest13, pretrained_G14, pretrained_D15, gpus16, if_cache_gpu17], info3) def train1key( exp_dir1, sr2, if_f0_3, trainset_dir4, spk_id5, np7, f0method8, save_epoch10, total_epoch11, batch_size12, if_save_latest13, pretrained_G14, pretrained_D15, gpus16, if_cache_gpu17, if_save_every_weights18, version19, echl ): infos = [] def get_info_str(strr): infos.append(strr) return "\n".join(infos) model_log_dir = "%s/logs/%s" % (now_dir, exp_dir1) preprocess_log_path = "%s/preprocess.log" % model_log_dir extract_f0_feature_log_path = "%s/extract_f0_feature.log" % model_log_dir gt_wavs_dir = "%s/0_gt_wavs" % model_log_dir feature_dir = ( "%s/3_feature256" % model_log_dir if version19 == "v1" else "%s/3_feature768" % model_log_dir ) os.makedirs(model_log_dir, exist_ok=True) #########step1:处理数据 open(preprocess_log_path, "w").close() cmd = ( config.python_cmd + " trainset_preprocess_pipeline_print.py %s %s %s %s " % (trainset_dir4, sr_dict[sr2], np7, model_log_dir) + str(config.noparallel) ) yield get_info_str(i18n("step1:正在处理数据")) yield get_info_str(cmd) p = Popen(cmd, shell=True) p.wait() with open(preprocess_log_path, "r") as f: print(f.read()) #########step2a:提取音高 open(extract_f0_feature_log_path, "w") if if_f0_3: yield get_info_str("step2a:正在提取音高") cmd = config.python_cmd + " extract_f0_print.py %s %s %s %s" % ( model_log_dir, np7, f0method8, echl ) yield get_info_str(cmd) p = Popen(cmd, shell=True, cwd=now_dir) p.wait() with open(extract_f0_feature_log_path, "r") as f: print(f.read()) else: yield get_info_str(i18n("step2a:无需提取音高")) #######step2b:提取特征 yield get_info_str(i18n("step2b:正在提取特征")) gpus = gpus16.split("-") leng = len(gpus) ps = [] for idx, n_g in enumerate(gpus): cmd = config.python_cmd + " extract_feature_print.py %s %s %s %s %s %s" % ( config.device, leng, idx, n_g, model_log_dir, version19, ) yield get_info_str(cmd) p = Popen( cmd, shell=True, cwd=now_dir ) # , shell=True, stdin=PIPE, stdout=PIPE, stderr=PIPE, cwd=now_dir ps.append(p) for p in ps: p.wait() with open(extract_f0_feature_log_path, "r") as f: print(f.read()) #######step3a:训练模型 yield get_info_str(i18n("step3a:正在训练模型")) # 生成filelist if if_f0_3: f0_dir = "%s/2a_f0" % model_log_dir f0nsf_dir = "%s/2b-f0nsf" % model_log_dir names = ( set([name.split(".")[0] for name in os.listdir(gt_wavs_dir)]) & set([name.split(".")[0] for name in os.listdir(feature_dir)]) & set([name.split(".")[0] for name in os.listdir(f0_dir)]) & set([name.split(".")[0] for name in os.listdir(f0nsf_dir)]) ) else: names = set([name.split(".")[0] for name in os.listdir(gt_wavs_dir)]) & set( [name.split(".")[0] for name in os.listdir(feature_dir)] ) opt = [] for name in names: if if_f0_3: opt.append( "%s/%s.wav|%s/%s.npy|%s/%s.wav.npy|%s/%s.wav.npy|%s" % ( gt_wavs_dir.replace("\\", "\\\\"), name, feature_dir.replace("\\", "\\\\"), name, f0_dir.replace("\\", "\\\\"), name, f0nsf_dir.replace("\\", "\\\\"), name, spk_id5, ) ) else: opt.append( "%s/%s.wav|%s/%s.npy|%s" % ( gt_wavs_dir.replace("\\", "\\\\"), name, feature_dir.replace("\\", "\\\\"), name, spk_id5, ) ) fea_dim = 256 if version19 == "v1" else 768 if if_f0_3: for _ in range(2): opt.append( "%s/logs/mute/0_gt_wavs/mute%s.wav|%s/logs/mute/3_feature%s/mute.npy|%s/logs/mute/2a_f0/mute.wav.npy|%s/logs/mute/2b-f0nsf/mute.wav.npy|%s" % (now_dir, sr2, now_dir, fea_dim, now_dir, now_dir, spk_id5) ) else: for _ in range(2): opt.append( "%s/logs/mute/0_gt_wavs/mute%s.wav|%s/logs/mute/3_feature%s/mute.npy|%s" % (now_dir, sr2, now_dir, fea_dim, spk_id5) ) shuffle(opt) with open("%s/filelist.txt" % model_log_dir, "w") as f: f.write("\n".join(opt)) yield get_info_str("write filelist done") if gpus16: cmd = ( config.python_cmd +" train_nsf_sim_cache_sid_load_pretrain.py -e %s -sr %s -f0 %s -bs %s -g %s -te %s -se %s %s %s -l %s -c %s -sw %s -v %s" % ( exp_dir1, sr2, 1 if if_f0_3 else 0, batch_size12, gpus16, total_epoch11, save_epoch10, ("-pg %s" % pretrained_G14) if pretrained_G14 != "" else "", ("-pd %s" % pretrained_D15) if pretrained_D15 != "" else "", 1 if if_save_latest13 == True else 0, 1 if if_cache_gpu17 == True else 0, 1 if if_save_every_weights18 == True else 0, version19, ) ) else: cmd = ( config.python_cmd + " train_nsf_sim_cache_sid_load_pretrain.py -e %s -sr %s -f0 %s -bs %s -te %s -se %s %s %s -l %s -c %s -sw %s -v %s" % ( exp_dir1, sr2, 1 if if_f0_3 else 0, batch_size12, total_epoch11, save_epoch10, ("-pg %s" % pretrained_G14) if pretrained_G14 != "" else "", ("-pd %s" % pretrained_D15) if pretrained_D15 != "" else "", 1 if if_save_latest13 == True else 0, 1 if if_cache_gpu17 == True else 0, 1 if if_save_every_weights18 == True else 0, version19, ) ) yield get_info_str(cmd) p = Popen(cmd, shell=True, cwd=now_dir) p.wait() yield get_info_str(i18n("训练结束, 您可查看控制台训练日志或实验文件夹下的train.log")) #######step3b:训练索引 npys = [] listdir_res = list(os.listdir(feature_dir)) for name in sorted(listdir_res): phone = np.load("%s/%s" % (feature_dir, name)) npys.append(phone) big_npy = np.concatenate(npys, 0) big_npy_idx = np.arange(big_npy.shape[0]) np.random.shuffle(big_npy_idx) big_npy = big_npy[big_npy_idx] np.save("%s/total_fea.npy" % model_log_dir, big_npy) # n_ivf = big_npy.shape[0] // 39 n_ivf = min(int(16 * np.sqrt(big_npy.shape[0])), big_npy.shape[0] // 39) yield get_info_str("%s,%s" % (big_npy.shape, n_ivf)) index = faiss.index_factory(256 if version19 == "v1" else 768, "IVF%s,Flat" % n_ivf) yield get_info_str("training index") index_ivf = faiss.extract_index_ivf(index) # index_ivf.nprobe = 1 index.train(big_npy) faiss.write_index( index, "%s/trained_IVF%s_Flat_nprobe_%s_%s_%s.index" % (model_log_dir, n_ivf, index_ivf.nprobe, exp_dir1, version19), ) yield get_info_str("adding index") batch_size_add = 8192 for i in range(0, big_npy.shape[0], batch_size_add): index.add(big_npy[i : i + batch_size_add]) faiss.write_index( index, "%s/added_IVF%s_Flat_nprobe_%s_%s_%s.index" % (model_log_dir, n_ivf, index_ivf.nprobe, exp_dir1, version19), ) yield get_info_str( "成功构建索引, added_IVF%s_Flat_nprobe_%s_%s_%s.index" % (n_ivf, index_ivf.nprobe, exp_dir1, version19) ) yield get_info_str(i18n("全流程结束!")) def whethercrepeornah(radio): mango = True if radio == 'mangio-crepe' or radio == 'mangio-crepe-tiny' else False return ({"visible": mango, "__type__": "update"}) # ckpt_path2.change(change_info_,[ckpt_path2],[sr__,if_f0__]) def change_info_(ckpt_path): if ( os.path.exists(ckpt_path.replace(os.path.basename(ckpt_path), "train.log")) == False ): return {"__type__": "update"}, {"__type__": "update"}, {"__type__": "update"} try: with open( ckpt_path.replace(os.path.basename(ckpt_path), "train.log"), "r" ) as f: info = eval(f.read().strip("\n").split("\n")[0].split("\t")[-1]) sr, f0 = info["sample_rate"], info["if_f0"] version = "v2" if ("version" in info and info["version"] == "v2") else "v1" return sr, str(f0), version except: traceback.print_exc() return {"__type__": "update"}, {"__type__": "update"}, {"__type__": "update"} from lib.infer_pack.models_onnx import SynthesizerTrnMsNSFsidM def export_onnx(ModelPath, ExportedPath, MoeVS=True): cpt = torch.load(ModelPath, map_location="cpu") cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0] # n_spk hidden_channels = 256 if cpt.get("version","v1")=="v1"else 768#cpt["config"][-2] # hidden_channels,为768Vec做准备 test_phone = torch.rand(1, 200, hidden_channels) # hidden unit test_phone_lengths = torch.tensor([200]).long() # hidden unit 长度(貌似没啥用) test_pitch = torch.randint(size=(1, 200), low=5, high=255) # 基频(单位赫兹) test_pitchf = torch.rand(1, 200) # nsf基频 test_ds = torch.LongTensor([0]) # 说话人ID test_rnd = torch.rand(1, 192, 200) # 噪声(加入随机因子) device = "cpu" # 导出时设备(不影响使用模型) net_g = SynthesizerTrnMsNSFsidM( *cpt["config"], is_half=False,version=cpt.get("version","v1") ) # fp32导出(C++要支持fp16必须手动将内存重新排列所以暂时不用fp16) net_g.load_state_dict(cpt["weight"], strict=False) input_names = ["phone", "phone_lengths", "pitch", "pitchf", "ds", "rnd"] output_names = [ "audio", ] # net_g.construct_spkmixmap(n_speaker) 多角色混合轨道导出 torch.onnx.export( net_g, ( test_phone.to(device), test_phone_lengths.to(device), test_pitch.to(device), test_pitchf.to(device), test_ds.to(device), test_rnd.to(device), ), ExportedPath, dynamic_axes={ "phone": [1], "pitch": [1], "pitchf": [1], "rnd": [2], }, do_constant_folding=False, opset_version=16, verbose=False, input_names=input_names, output_names=output_names, ) return "Finished" #region RVC WebUI App def get_presets(): data = None with open('../inference-presets.json', 'r') as file: data = json.load(file) preset_names = [] for preset in data['presets']: preset_names.append(preset['name']) return preset_names def change_choices2(): audio_files=[] for filename in os.listdir("./audios"): if filename.endswith(('.wav','.mp3','.ogg','.flac','.m4a','.aac','.mp4')): audio_files.append(os.path.join('./audios',filename).replace('\\', '/')) return {"choices": sorted(audio_files), "__type__": "update"}, {"__type__": "update"} audio_files=[] for filename in os.listdir("./audios"): if filename.endswith(('.wav','.mp3','.ogg','.flac','.m4a','.aac','.mp4')): audio_files.append(os.path.join('./audios',filename).replace('\\', '/')) def get_index(): if check_for_name() != '': chosen_model=sorted(names)[0].split(".")[0] logs_path="./logs/"+chosen_model if os.path.exists(logs_path): for file in os.listdir(logs_path): if file.endswith(".index"): return os.path.join(logs_path, file) return '' else: return '' def get_indexes(): indexes_list=[] for dirpath, dirnames, filenames in os.walk("./logs/"): for filename in filenames: if filename.endswith(".index"): indexes_list.append(os.path.join(dirpath,filename)) if len(indexes_list) > 0: return indexes_list else: return '' def get_name(): if len(audio_files) > 0: return sorted(audio_files)[0] else: return '' def save_to_wav(record_button): if record_button is None: pass else: path_to_file=record_button new_name = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")+'.wav' new_path='./audios/'+new_name shutil.move(path_to_file,new_path) return new_path def save_to_wav2(dropbox): file_path=dropbox.name shutil.move(file_path,'./audios') return os.path.join('./audios',os.path.basename(file_path)) def match_index(sid0): folder=sid0.split(".")[0] parent_dir="./logs/"+folder if os.path.exists(parent_dir): for filename in os.listdir(parent_dir): if filename.endswith(".index"): index_path=os.path.join(parent_dir,filename) return index_path else: return '' def check_for_name(): if len(names) > 0: return sorted(names)[0] else: return '' def download_from_url(url, model): if url == '': return "URL cannot be left empty." if model =='': return "You need to name your model. For example: My-Model" url = url.strip() zip_dirs = ["zips", "unzips"] for directory in zip_dirs: if os.path.exists(directory): shutil.rmtree(directory) os.makedirs("zips", exist_ok=True) os.makedirs("unzips", exist_ok=True) zipfile = model + '.zip' zipfile_path = './zips/' + zipfile try: if "drive.google.com" in url: subprocess.run(["gdown", url, "--fuzzy", "-O", zipfile_path]) elif "mega.nz" in url: m = Mega() m.download_url(url, './zips') else: subprocess.run(["wget", url, "-O", zipfile_path]) for filename in os.listdir("./zips"): if filename.endswith(".zip"): zipfile_path = os.path.join("./zips/",filename) shutil.unpack_archive(zipfile_path, "./unzips", 'zip') else: return "No zipfile found." for root, dirs, files in os.walk('./unzips'): for file in files: file_path = os.path.join(root, file) if file.endswith(".index"): os.mkdir(f'./logs/{model}') shutil.copy2(file_path,f'./logs/{model}') elif "G_" not in file and "D_" not in file and file.endswith(".pth"): shutil.copy(file_path,f'./weights/{model}.pth') shutil.rmtree("zips") shutil.rmtree("unzips") return "Model downloaded, you can go back to the inference page!" except: return "ERROR - The download failed. Check if the link is valid." def success_message(face): return f'{face.name} has been uploaded.', 'None' def mouth(size, face, voice, faces): if size == 'Half': size = 2 else: size = 1 if faces == 'None': character = face.name else: if faces == 'Ben Shapiro': character = '/content/wav2lip-HD/inputs/ben-shapiro-10.mp4' elif faces == 'Andrew Tate': character = '/content/wav2lip-HD/inputs/tate-7.mp4' command = "python inference.py " \ "--checkpoint_path checkpoints/wav2lip.pth " \ f"--face {character} " \ f"--audio {voice} " \ "--pads 0 20 0 0 " \ "--outfile /content/wav2lip-HD/outputs/result.mp4 " \ "--fps 24 " \ f"--resize_factor {size}" process = subprocess.Popen(command, shell=True, cwd='/content/wav2lip-HD/Wav2Lip-master') stdout, stderr = process.communicate() return '/content/wav2lip-HD/outputs/result.mp4', 'Animation completed.' eleven_voices = ['Adam','Antoni','Josh','Arnold','Sam','Bella','Rachel','Domi','Elli'] eleven_voices_ids=['pNInz6obpgDQGcFmaJgB','ErXwobaYiN019PkySvjV','TxGEqnHWrfWFTfGW9XjX','VR6AewLTigWG4xSOukaG','yoZ06aMxZJJ28mfd3POQ','EXAVITQu4vr4xnSDxMaL','21m00Tcm4TlvDq8ikWAM','AZnzlk1XvdvUeBnXmlld','MF3mGyEYCl7XYWbV9V6O'] chosen_voice = dict(zip(eleven_voices, eleven_voices_ids)) def stoptraining(mim): if int(mim) == 1: try: CSVutil('csvdb/stop.csv', 'w+', 'stop', 'True') os.kill(PID, signal.SIGTERM) except Exception as e: print(f"Couldn't click due to {e}") return ( {"visible": False, "__type__": "update"}, {"visible": True, "__type__": "update"}, ) def elevenTTS(xiapi, text, id, lang): if xiapi!= '' and id !='': choice = chosen_voice[id] CHUNK_SIZE = 1024 url = f"https://api.elevenlabs.io/v1/text-to-speech/{choice}" headers = { "Accept": "audio/mpeg", "Content-Type": "application/json", "xi-api-key": xiapi } if lang == 'en': data = { "text": text, "model_id": "eleven_monolingual_v1", "voice_settings": { "stability": 0.5, "similarity_boost": 0.5 } } else: data = { "text": text, "model_id": "eleven_multilingual_v1", "voice_settings": { "stability": 0.5, "similarity_boost": 0.5 } } response = requests.post(url, json=data, headers=headers) with open('./temp_eleven.mp3', 'wb') as f: for chunk in response.iter_content(chunk_size=CHUNK_SIZE): if chunk: f.write(chunk) aud_path = save_to_wav('./temp_eleven.mp3') return aud_path, aud_path else: tts = gTTS(text, lang=lang) tts.save('./temp_gTTS.mp3') aud_path = save_to_wav('./temp_gTTS.mp3') return aud_path, aud_path def ilariaTTS(text, ttsvoice): vo=language_dict[ttsvoice] asyncio.run(edge_tts.Communicate(text, vo).save("./temp_ilaria.mp3")) aud_path = save_to_wav('./temp_ilaria.mp3') return aud_path, aud_path def upload_to_dataset(files, dir): if dir == '': dir = './dataset' if not os.path.exists(dir): os.makedirs(dir) count = 0 for file in files: path=file.name shutil.copy2(path,dir) count += 1 return f' {count} files uploaded to {dir}.' def zip_downloader(model): if not os.path.exists(f'./weights/{model}.pth'): return {"__type__": "update"}, f'Make sure the Voice Name is correct. I could not find {model}.pth' index_found = False for file in os.listdir(f'./logs/{model}'): if file.endswith('.index') and 'added' in file: log_file = file index_found = True if index_found: return [f'./weights/{model}.pth', f'./logs/{model}/{log_file}'], "Done" else: return f'./weights/{model}.pth', "Could not find Index file." with gr.Blocks(theme=gr.themes.Default(primary_hue="pink", secondary_hue="rose"), title="Ilaria RVC 💖") as app: with gr.Tabs(): with gr.TabItem("Inference"): gr.HTML("