Spaces:
Running
Running
import gradio as gr | |
from rvc_infer import infer_audio, get_current_models | |
import os | |
import re | |
import random | |
from scipy.io.wavfile import write | |
from scipy.io.wavfile import read | |
import numpy as np | |
import yt_dlp | |
import subprocess | |
import zipfile | |
import shutil | |
import urllib | |
print("downloading RVC models") | |
os.system("python dowoad_param.py") | |
BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) | |
rvc_models_dir = os.path.join(BASE_DIR, 'models') | |
def update_models_list(): | |
models_l = get_current_models(rvc_models_dir) | |
return gr.update(choices=models_l) | |
def extract_zip(extraction_folder, zip_name): | |
os.makedirs(extraction_folder) | |
with zipfile.ZipFile(zip_name, 'r') as zip_ref: | |
zip_ref.extractall(extraction_folder) | |
os.remove(zip_name) | |
index_filepath, model_filepath = None, None | |
for root, dirs, files in os.walk(extraction_folder): | |
for name in files: | |
if name.endswith('.index') and os.stat(os.path.join(root, name)).st_size > 1024 * 100: | |
index_filepath = os.path.join(root, name) | |
if name.endswith('.pth') and os.stat(os.path.join(root, name)).st_size > 1024 * 1024 * 40: | |
model_filepath = os.path.join(root, name) | |
if not model_filepath: | |
raise gr.Error(f'No .pth model file was found in the extracted zip. Please check {extraction_folder}.') | |
# move model and index file to extraction folder | |
os.rename(model_filepath, os.path.join(extraction_folder, os.path.basename(model_filepath))) | |
if index_filepath: | |
os.rename(index_filepath, os.path.join(extraction_folder, os.path.basename(index_filepath))) | |
# remove any unnecessary nested folders | |
for filepath in os.listdir(extraction_folder): | |
if os.path.isdir(os.path.join(extraction_folder, filepath)): | |
shutil.rmtree(os.path.join(extraction_folder, filepath)) | |
def download_online_model(url, dir_name, progress=gr.Progress()): | |
try: | |
progress(0, desc=f'[~] Downloading voice model with name {dir_name}...') | |
zip_name = url.split('/')[-1] | |
extraction_folder = os.path.join(rvc_models_dir, dir_name) | |
if os.path.exists(extraction_folder): | |
raise gr.Error(f'Voice model directory {dir_name} already exists! Choose a different name for your voice model.') | |
if 'pixeldrain.com' in url: | |
url = f'https://pixeldrain.com/api/file/{zip_name}' | |
urllib.request.urlretrieve(url, zip_name) | |
progress(0.5, desc='[~] Extracting zip...') | |
extract_zip(extraction_folder, zip_name) | |
return f'[+] {dir_name} Model successfully downloaded!' | |
except Exception as e: | |
raise gr.Error(str(e)) | |
def download_audio(url): | |
ydl_opts = { | |
'format': 'bestaudio/best', | |
'outtmpl': 'ytdl/%(title)s.%(ext)s', | |
'postprocessors': [{ | |
'key': 'FFmpegExtractAudio', | |
'preferredcodec': 'wav', | |
'preferredquality': '192', | |
}], | |
} | |
with yt_dlp.YoutubeDL(ydl_opts) as ydl: | |
info_dict = ydl.extract_info(url, download=True) | |
file_path = ydl.prepare_filename(info_dict).rsplit('.', 1)[0] + '.wav' | |
sample_rate, audio_data = read(file_path) | |
audio_array = np.asarray(audio_data, dtype=np.int16) | |
return sample_rate, audio_array | |
CSS = """ | |
""" | |
with gr.Blocks(theme="Hev832/Applio", fill_width=True, css=CSS) as demo: | |
gr.Markdown("# RVC INFER DEMOS ") | |
gr.Markdown(f"# recommended using colab version with more feature!<br> [![Open In Collab](https://img.shields.io/badge/google_colab-F9AB00?style=flat-square&logo=googlecolab&logoColor=white)](https://colab.research.google.com/drive/1bM1LB2__WNFxX8pyZmUPQZYq7dg58YWG?usp=sharing) ") | |
with gr.Tab("Inferenece"): | |
gr.Markdown("in progress") | |
model_name = gr.Dropdown(label='Voice Models', info='Models folder "rvc_infer --> models". After new models are added into this folder, click the refresh button') | |
ref_btn = gr.Button('Refresh Models', variant='primary') | |
input_audio = gr.Audio(label="Input Audio", type="filepath") | |
with gr.Accordion("Settings", open=False): | |
f0_change = gr.Slider(label="f0 change", minimum=-12, maximum=12, step=1, value=0) | |
f0_method = gr.Dropdown(label="f0 method", choices=["rmvpe+", "rmvpe", "fcpe", " hybrid[rmvpe+fcpe]"], value="rmvpe+") | |
min_pitch = gr.Textbox(label="min pitch", lines=1, value="-12") | |
max_pitch = gr.Textbox(label="max pitch", lines=1, value="12") | |
crepe_hop_length = gr.Slider(label="crepe_hop_length", minimum=0, maximum=256, step=1, value=128) | |
index_rate = gr.Slider(label="index_rate", minimum=0, maximum=1.0, step=0.01, value=0.75) | |
filter_radius = gr.Slider(label="filter_radius", minimum=0, maximum=10.0, step=0.01, value=3) | |
rms_mix_rate = gr.Slider(label="rms_mix_rate", minimum=0, maximum=1.0, step=0.01, value=0.25) | |
protect = gr.Slider(label="protect", minimum=0, maximum=1.0, step=0.01, value=0.33) | |
with gr.Accordion("Advanced Settings", open=False): | |
split_infer = gr.Checkbox(label="split_infer", value=False) | |
min_silence = gr.Slider(label="min_silence", minimum=0, maximum=1000, step=1, value=500) | |
silence_threshold = gr.Slider(label="silence_threshold", minimum=-1000, maximum=1000, step=1, value=-50) | |
seek_step = gr.Slider(label="seek_step", minimum=0, maximum=100, step=1, value=0) | |
keep_silence = gr.Slider(label="keep_silence", minimum=-1000, maximum=1000, step=1, value=100) | |
do_formant = gr.Checkbox(label="do_formant", value=False) | |
quefrency = gr.Slider(label="quefrency", minimum=0, maximum=100, step=1, value=0) | |
timbre = gr.Slider(label="timbre", minimum=0, maximum=100, step=1, value=1) | |
f0_autotune = gr.Checkbox(label="f0_autotune", value=False) | |
audio_format = gr.Dropdown(label="audio_format", choices=["wav"], value="wav", visible=False) | |
resample_sr = gr.Slider(label="resample_sr", minimum=0, maximum=100, step=1, value=0) | |
hubert_model_path = gr.Textbox(label="hubert_model_path", lines=1, value="hubert_base.pt", visible=False) | |
rmvpe_model_path = gr.Textbox(label="rmvpe_model_path", lines=1, value="rmvpe.pt", visible=False) | |
fcpe_model_path = gr.Textbox(label="fcpe_model_path", lines=1, value="fcpe.pt", visible=False) | |
submit_inference = gr.Button('Inference', variant='primary') | |
result_audio = gr.Audio("Output Audio") | |
with gr.Tab("Download Model"): | |
gr.Markdown("## Download Model for infernece") | |
url_input = gr.Textbox(label="Model URL", placeholder="Enter the URL of the model") | |
dir_name_input = gr.Textbox(label="Directory Name", placeholder="Enter the directory name") | |
output = gr.Textbox(label="Output Models") | |
download_button = gr.Button("Download Model") | |
download_button.click(download_online_model, inputs=[url_input, dir_name_input], outputs=output) | |
with gr.Tab(" Credits"): | |
gr.Markdown( | |
""" | |
this project made by [Blane187](https://huggingface.co/Blane187) with Improvements by [John6666](https://huggingfce.co/John6666) | |
""") | |
ref_btn.click(update_models_list, None, outputs=model_name) | |
gr.on( | |
triggers=[submit_inference.click], | |
fn=infer_audio, | |
inputs=[model_name, input_audio, f0_change, f0_method, min_pitch, max_pitch, crepe_hop_length, index_rate, | |
filter_radius, rms_mix_rate, protect, split_infer, min_silence, silence_threshold, seek_step, | |
keep_silence, do_formant, quefrency, timbre, f0_autotune, audio_format, resample_sr, | |
hubert_model_path, rmvpe_model_path, fcpe_model_path], | |
outputs=[result_audio], | |
queue=True, | |
show_api=True, | |
show_progress="full", | |
) | |
demo.queue() | |
demo.launch(debug=True,share=True,show_api=False) | |