import logging import os import sys import warnings import librosa import numpy as np import soundfile as sf import wget from torch import nn from transformers import HubertModel # Remove this to see warnings about transformers models warnings.filterwarnings("ignore") logging.getLogger("fairseq").setLevel(logging.ERROR) logging.getLogger("faiss.loader").setLevel(logging.ERROR) logging.getLogger("transformers").setLevel(logging.ERROR) logging.getLogger("torch").setLevel(logging.ERROR) now_dir = os.getcwd() sys.path.append(now_dir) base_path = os.path.join(now_dir, "rvc", "models", "formant", "stftpitchshift") stft = base_path + ".exe" if sys.platform == "win32" else base_path class HubertModelWithFinalProj(HubertModel): def __init__(self, config): super().__init__(config) self.final_proj = nn.Linear(config.hidden_size, config.classifier_proj_size) def load_audio_infer( file, sample_rate, **kwargs, ): formant_shifting = kwargs.get("formant_shifting", False) try: file = file.strip(" ").strip('"').strip("\n").strip('"').strip(" ") if not os.path.isfile(file): raise FileNotFoundError(f"File not found: {file}") audio, sr = sf.read(file) if len(audio.shape) > 1: audio = librosa.to_mono(audio.T) if sr != sample_rate: audio = librosa.resample(audio, orig_sr=sr, target_sr=sample_rate) if formant_shifting: formant_qfrency = kwargs.get("formant_qfrency", 0.8) formant_timbre = kwargs.get("formant_timbre", 0.8) from stftpitchshift import StftPitchShift pitchshifter = StftPitchShift(1024, 32, sample_rate) audio = pitchshifter.shiftpitch( audio, factors=1, quefrency=formant_qfrency * 1e-3, distortion=formant_timbre, ) except Exception as error: raise RuntimeError(f"An error occurred loading the audio: {error}") return np.array(audio).flatten() def load_embedding(embedder_model, custom_embedder=None): embedder_root = os.path.join(now_dir, "rvc", "models", "embedders") embedding_list = { "contentvec": os.path.join(embedder_root, "contentvec"), "chinese-hubert-base": os.path.join(embedder_root, "chinese_hubert_base"), "japanese-hubert-base": os.path.join(embedder_root, "japanese_hubert_base"), "korean-hubert-base": os.path.join(embedder_root, "korean_hubert_base"), } online_embedders = { "contentvec": "https://huggingface.co/IAHispano/Applio/resolve/main/Resources/embedders/contentvec/pytorch_model.bin", "chinese-hubert-base": "https://huggingface.co/IAHispano/Applio/resolve/main/Resources/embedders/chinese_hubert_base/pytorch_model.bin", "japanese-hubert-base": "https://huggingface.co/IAHispano/Applio/resolve/main/Resources/embedders/japanese_hubert_base/pytorch_model.bin", "korean-hubert-base": "https://huggingface.co/IAHispano/Applio/resolve/main/Resources/embedders/korean_hubert_base/pytorch_model.bin", } config_files = { "contentvec": "https://huggingface.co/IAHispano/Applio/resolve/main/Resources/embedders/contentvec/config.json", "chinese-hubert-base": "https://huggingface.co/IAHispano/Applio/resolve/main/Resources/embedders/chinese_hubert_base/config.json", "japanese-hubert-base": "https://huggingface.co/IAHispano/Applio/resolve/main/Resources/embedders/japanese_hubert_base/config.json", "korean-hubert-base": "https://huggingface.co/IAHispano/Applio/resolve/main/Resources/embedders/korean_hubert_base/config.json", } if embedder_model == "custom": if os.path.exists(custom_embedder): model_path = custom_embedder else: print(f"Custom embedder not found: {custom_embedder}, using contentvec") model_path = embedding_list["contentvec"] else: model_path = embedding_list[embedder_model] bin_file = os.path.join(model_path, "pytorch_model.bin") json_file = os.path.join(model_path, "config.json") os.makedirs(model_path, exist_ok=True) if not os.path.exists(bin_file): url = online_embedders[embedder_model] print(f"Downloading {url} to {model_path}...") wget.download(url, out=bin_file) if not os.path.exists(json_file): url = config_files[embedder_model] print(f"Downloading {url} to {model_path}...") wget.download(url, out=json_file) models = HubertModelWithFinalProj.from_pretrained(model_path) return models