Applio / core.py
Aitron Emper
Update core.py
7e361e2 verified
raw
history blame
38.3 kB
import os
import sys
import json
import argparse
import subprocess
import spaces
now_dir = os.getcwd()
sys.path.append(now_dir)
from rvc.configs.config import Config
from rvc.lib.tools.prerequisites_download import prequisites_download_pipeline
from rvc.train.extract.preparing_files import generate_config, generate_filelist
from rvc.lib.tools.pretrained_selector import pretrained_selector
from rvc.train.process.model_blender import model_blender
from rvc.train.process.model_information import model_information
from rvc.train.process.extract_small_model import extract_small_model
from rvc.infer.infer import infer_pipeline
from rvc.lib.tools.analyzer import analyze_audio
from rvc.lib.tools.launch_tensorboard import launch_tensorboard_pipeline
from rvc.lib.tools.model_download import model_download_pipeline
config = Config()
current_script_directory = os.path.dirname(os.path.realpath(__file__))
logs_path = os.path.join(current_script_directory, "logs")
# Get TTS Voices
with open(os.path.join("rvc", "lib", "tools", "tts_voices.json"), "r") as f:
voices_data = json.load(f)
locales = list({voice["Locale"] for voice in voices_data})
# Infer
@spaces.GPU
def run_infer_script(
f0up_key,
filter_radius,
index_rate,
rms_mix_rate,
protect,
hop_length,
f0method,
input_path,
output_path,
pth_path,
index_path,
split_audio,
f0autotune,
clean_audio,
clean_strength,
export_format,
embedder_model,
embedder_model_custom,
upscale_audio,
):
f0autotune = "True" if str(f0autotune) == "True" else "False"
clean_audio = "True" if str(clean_audio) == "True" else "False"
upscale_audio = "True" if str(upscale_audio) == "True" else "False"
infer_pipeline(
f0up_key,
filter_radius,
index_rate,
rms_mix_rate,
protect,
hop_length,
f0method,
input_path,
output_path,
pth_path,
index_path,
split_audio,
f0autotune,
clean_audio,
clean_strength,
export_format,
embedder_model,
embedder_model_custom,
upscale_audio,
)
return f"File {input_path} inferred successfully.", output_path.replace(
".wav", f".{export_format.lower()}"
)
# Batch infer
def run_batch_infer_script(
f0up_key,
filter_radius,
index_rate,
rms_mix_rate,
protect,
hop_length,
f0method,
input_folder,
output_folder,
pth_path,
index_path,
split_audio,
f0autotune,
clean_audio,
clean_strength,
export_format,
embedder_model,
embedder_model_custom,
upscale_audio,
):
f0autotune = "True" if str(f0autotune) == "True" else "False"
clean_audio = "True" if str(clean_audio) == "True" else "False"
upscale_audio = "True" if str(upscale_audio) == "True" else "False"
audio_files = [
f for f in os.listdir(input_folder) if f.endswith((".mp3", ".wav", ".flac"))
]
print(f"Detected {len(audio_files)} audio files for inference.")
for audio_file in audio_files:
if "_output" in audio_file:
pass
else:
input_path = os.path.join(input_folder, audio_file)
output_file_name = os.path.splitext(os.path.basename(audio_file))[0]
output_path = os.path.join(
output_folder,
f"{output_file_name}_output{os.path.splitext(audio_file)[1]}",
)
print(f"Inferring {input_path}...")
infer_pipeline(
f0up_key,
filter_radius,
index_rate,
rms_mix_rate,
protect,
hop_length,
f0method,
input_path,
output_path,
pth_path,
index_path,
split_audio,
f0autotune,
clean_audio,
clean_strength,
export_format,
embedder_model,
embedder_model_custom,
upscale_audio,
)
return f"Files from {input_folder} inferred successfully."
# TTS
def run_tts_script(
tts_text,
tts_voice,
tts_rate,
f0up_key,
filter_radius,
index_rate,
rms_mix_rate,
protect,
hop_length,
f0method,
output_tts_path,
output_rvc_path,
pth_path,
index_path,
split_audio,
f0autotune,
clean_audio,
clean_strength,
export_format,
embedder_model,
embedder_model_custom,
upscale_audio,
):
f0autotune = "True" if str(f0autotune) == "True" else "False"
clean_audio = "True" if str(clean_audio) == "True" else "False"
upscale_audio = "True" if str(upscale_audio) == "True" else "False"
tts_script_path = os.path.join("rvc", "lib", "tools", "tts.py")
if os.path.exists(output_tts_path):
os.remove(output_tts_path)
command_tts = [
"python",
tts_script_path,
tts_text,
tts_voice,
str(tts_rate),
output_tts_path,
]
subprocess.run(command_tts)
infer_pipeline(
f0up_key,
filter_radius,
index_rate,
rms_mix_rate,
protect,
hop_length,
f0method,
output_tts_path,
output_rvc_path,
pth_path,
index_path,
split_audio,
f0autotune,
clean_audio,
clean_strength,
export_format,
embedder_model,
embedder_model_custom,
upscale_audio,
)
return f"Text {tts_text} synthesized successfully.", output_rvc_path.replace(
".wav", f".{export_format.lower()}"
)
# Preprocess
def run_preprocess_script(model_name, dataset_path, sampling_rate):
per = 3.0 if config.is_half else 3.7
preprocess_script_path = os.path.join("rvc", "train", "preprocess", "preprocess.py")
command = [
"python",
preprocess_script_path,
*map(
str,
[
os.path.join(logs_path, model_name),
dataset_path,
sampling_rate,
per,
],
),
]
os.makedirs(os.path.join(logs_path, model_name), exist_ok=True)
subprocess.run(command)
return f"Model {model_name} preprocessed successfully."
# Extract
def run_extract_script(
model_name,
rvc_version,
f0method,
hop_length,
sampling_rate,
embedder_model,
embedder_model_custom,
):
model_path = os.path.join(logs_path, model_name)
extract_f0_script_path = os.path.join(
"rvc", "train", "extract", "extract_f0_print.py"
)
extract_feature_script_path = os.path.join(
"rvc", "train", "extract", "extract_feature_print.py"
)
command_1 = [
"python",
extract_f0_script_path,
*map(
str,
[
model_path,
f0method,
hop_length,
],
),
]
command_2 = [
"python",
extract_feature_script_path,
*map(
str,
[
config.device,
"1",
"0",
"0",
model_path,
rvc_version,
"True",
embedder_model,
embedder_model_custom,
],
),
]
subprocess.run(command_1)
subprocess.run(command_2)
generate_config(rvc_version, sampling_rate, model_path)
generate_filelist(f0method, model_path, rvc_version, sampling_rate)
return f"Model {model_name} extracted successfully."
# Train
def run_train_script(
model_name,
rvc_version,
save_every_epoch,
save_only_latest,
save_every_weights,
total_epoch,
sampling_rate,
batch_size,
gpu,
pitch_guidance,
overtraining_detector,
overtraining_threshold,
pretrained,
custom_pretrained,
sync_graph,
g_pretrained_path=None,
d_pretrained_path=None,
):
f0 = 1 if str(pitch_guidance) == "True" else 0
latest = 1 if str(save_only_latest) == "True" else 0
save_every = 1 if str(save_every_weights) == "True" else 0
detector = 1 if str(overtraining_detector) == "True" else 0
sync = 1 if str(sync_graph) == "True" else 0
if str(pretrained) == "True":
if str(custom_pretrained) == "False":
pg, pd = pretrained_selector(f0)[rvc_version][sampling_rate]
else:
if g_pretrained_path is None or d_pretrained_path is None:
raise ValueError(
"Please provide the path to the pretrained G and D models."
)
pg, pd = g_pretrained_path, d_pretrained_path
else:
pg, pd = "", ""
train_script_path = os.path.join("rvc", "train", "train.py")
command = [
"python",
train_script_path,
*map(
str,
[
"-se",
save_every_epoch,
"-te",
total_epoch,
"-pg",
pg,
"-pd",
pd,
"-sr",
sampling_rate,
"-bs",
batch_size,
"-g",
gpu,
"-e",
os.path.join(logs_path, model_name),
"-v",
rvc_version,
"-l",
latest,
"-c",
"0",
"-sw",
save_every,
"-f0",
f0,
"-od",
detector,
"-ot",
overtraining_threshold,
"-sg",
sync,
],
),
]
subprocess.run(command)
run_index_script(model_name, rvc_version)
return f"Model {model_name} trained successfully."
# Index
def run_index_script(model_name, rvc_version):
index_script_path = os.path.join("rvc", "train", "process", "extract_index.py")
command = [
"python",
index_script_path,
os.path.join(logs_path, model_name),
rvc_version,
]
subprocess.run(command)
return f"Index file for {model_name} generated successfully."
# Model extract
def run_model_extract_script(
pth_path, model_name, sampling_rate, pitch_guidance, rvc_version, epoch, step
):
f0 = 1 if str(pitch_guidance) == "True" else 0
extract_small_model(
pth_path, model_name, sampling_rate, f0, rvc_version, epoch, step
)
return f"Model {model_name} extracted successfully."
# Model information
def run_model_information_script(pth_path):
print(model_information(pth_path))
# Model blender
def run_model_blender_script(model_name, pth_path_1, pth_path_2, ratio):
message, model_blended = model_blender(model_name, pth_path_1, pth_path_2, ratio)
return message, model_blended
# Tensorboard
def run_tensorboard_script():
launch_tensorboard_pipeline()
# Download
def run_download_script(model_link):
model_download_pipeline(model_link)
return f"Model downloaded successfully."
# Prerequisites
def run_prerequisites_script(pretraineds_v1, pretraineds_v2, models, exe):
prequisites_download_pipeline(pretraineds_v1, pretraineds_v2, models, exe)
return "Prerequisites installed successfully."
# Audio analyzer
def run_audio_analyzer_script(input_path, save_plot_path="logs/audio_analysis.png"):
audio_info, plot_path = analyze_audio(input_path, save_plot_path)
print(
f"Audio info of {input_path}: {audio_info}",
f"Audio file {input_path} analyzed successfully. Plot saved at: {plot_path}",
)
return audio_info, plot_path
# API
def run_api_script(ip, port):
command = [
"env/Scripts/uvicorn.exe" if os.name == "nt" else "uvicorn",
"api:app",
"--host",
ip,
"--port",
port,
]
subprocess.run(command)
# Parse arguments
def parse_arguments():
parser = argparse.ArgumentParser(
description="Run the main.py script with specific parameters."
)
subparsers = parser.add_subparsers(
title="subcommands", dest="mode", help="Choose a mode"
)
# Parser for 'infer' mode
infer_parser = subparsers.add_parser("infer", help="Run inference")
infer_parser.add_argument(
"--f0up_key",
type=str,
help="Value for f0up_key",
choices=[str(i) for i in range(-24, 25)],
default="0",
)
infer_parser.add_argument(
"--filter_radius",
type=str,
help="Value for filter_radius",
choices=[str(i) for i in range(11)],
default="3",
)
infer_parser.add_argument(
"--index_rate",
type=str,
help="Value for index_rate",
choices=[str(i / 10) for i in range(11)],
default="0.3",
)
infer_parser.add_argument(
"--rms_mix_rate",
type=str,
help="Value for rms_mix_rate",
choices=[str(i / 10) for i in range(11)],
default="1",
)
infer_parser.add_argument(
"--protect",
type=str,
help="Value for protect",
choices=[str(i / 10) for i in range(6)],
default="0.33",
)
infer_parser.add_argument(
"--hop_length",
type=str,
help="Value for hop_length",
choices=[str(i) for i in range(1, 513)],
default="128",
)
infer_parser.add_argument(
"--f0method",
type=str,
help="Value for f0method",
choices=[
"pm",
"harvest",
"dio",
"crepe",
"crepe-tiny",
"rmvpe",
"fcpe",
"hybrid[crepe+rmvpe]",
"hybrid[crepe+fcpe]",
"hybrid[rmvpe+fcpe]",
"hybrid[crepe+rmvpe+fcpe]",
],
default="rmvpe",
)
infer_parser.add_argument("--input_path", type=str, help="Input path")
infer_parser.add_argument("--output_path", type=str, help="Output path")
infer_parser.add_argument("--pth_path", type=str, help="Path to the .pth file")
infer_parser.add_argument(
"--index_path",
type=str,
help="Path to the .index file",
)
infer_parser.add_argument(
"--split_audio",
type=str,
help="Enable split audio",
choices=["True", "False"],
default="False",
)
infer_parser.add_argument(
"--f0autotune",
type=str,
help="Enable autotune",
choices=["True", "False"],
default="False",
)
infer_parser.add_argument(
"--clean_audio",
type=str,
help="Enable clean audio",
choices=["True", "False"],
default="False",
)
infer_parser.add_argument(
"--clean_strength",
type=str,
help="Value for clean_strength",
choices=[str(i / 10) for i in range(11)],
default="0.7",
)
infer_parser.add_argument(
"--export_format",
type=str,
help="Export format",
choices=["WAV", "MP3", "FLAC", "OGG", "M4A"],
default="WAV",
)
infer_parser.add_argument(
"--embedder_model",
type=str,
help="Embedder model",
choices=["contentvec", "hubert", "custom"],
default="hubert",
)
infer_parser.add_argument(
"--embedder_model_custom",
type=str,
help="Custom Embedder model",
default=None,
)
infer_parser.add_argument(
"--upscale_audio",
type=str,
help="Enable audio upscaling",
choices=["True", "False"],
default="False",
)
# Parser for 'batch_infer' mode
batch_infer_parser = subparsers.add_parser(
"batch_infer", help="Run batch inference"
)
batch_infer_parser.add_argument(
"--f0up_key",
type=str,
help="Value for f0up_key",
choices=[str(i) for i in range(-24, 25)],
default="0",
)
batch_infer_parser.add_argument(
"--filter_radius",
type=str,
help="Value for filter_radius",
choices=[str(i) for i in range(11)],
default="3",
)
batch_infer_parser.add_argument(
"--index_rate",
type=str,
help="Value for index_rate",
choices=[str(i / 10) for i in range(11)],
default="0.3",
)
batch_infer_parser.add_argument(
"--rms_mix_rate",
type=str,
help="Value for rms_mix_rate",
choices=[str(i / 10) for i in range(11)],
default="1",
)
batch_infer_parser.add_argument(
"--protect",
type=str,
help="Value for protect",
choices=[str(i / 10) for i in range(6)],
default="0.33",
)
batch_infer_parser.add_argument(
"--hop_length",
type=str,
help="Value for hop_length",
choices=[str(i) for i in range(1, 513)],
default="128",
)
batch_infer_parser.add_argument(
"--f0method",
type=str,
help="Value for f0method",
choices=[
"pm",
"harvest",
"dio",
"crepe",
"crepe-tiny",
"rmvpe",
"fcpe",
"hybrid[crepe+rmvpe]",
"hybrid[crepe+fcpe]",
"hybrid[rmvpe+fcpe]",
"hybrid[crepe+rmvpe+fcpe]",
],
default="rmvpe",
)
batch_infer_parser.add_argument("--input_folder", type=str, help="Input folder")
batch_infer_parser.add_argument("--output_folder", type=str, help="Output folder")
batch_infer_parser.add_argument(
"--pth_path", type=str, help="Path to the .pth file"
)
batch_infer_parser.add_argument(
"--index_path",
type=str,
help="Path to the .index file",
)
batch_infer_parser.add_argument(
"--split_audio",
type=str,
help="Enable split audio",
choices=["True", "False"],
default="False",
)
batch_infer_parser.add_argument(
"--f0autotune",
type=str,
help="Enable autotune",
choices=["True", "False"],
default="False",
)
batch_infer_parser.add_argument(
"--clean_audio",
type=str,
help="Enable clean audio",
choices=["True", "False"],
default="False",
)
batch_infer_parser.add_argument(
"--clean_strength",
type=str,
help="Value for clean_strength",
choices=[str(i / 10) for i in range(11)],
default="0.7",
)
batch_infer_parser.add_argument(
"--export_format",
type=str,
help="Export format",
choices=["WAV", "MP3", "FLAC", "OGG", "M4A"],
default="WAV",
)
batch_infer_parser.add_argument(
"--embedder_model",
type=str,
help="Embedder model",
choices=["contentvec", "hubert", "custom"],
default="hubert",
)
batch_infer_parser.add_argument(
"--embedder_model_custom",
type=str,
help="Custom Embedder model",
default=None,
)
batch_infer_parser.add_argument(
"--upscale_audio",
type=str,
help="Enable audio upscaling",
choices=["True", "False"],
default="False",
)
# Parser for 'tts' mode
tts_parser = subparsers.add_parser("tts", help="Run TTS")
tts_parser.add_argument(
"--tts_text",
type=str,
help="Text to be synthesized",
)
tts_parser.add_argument(
"--tts_voice",
type=str,
help="Voice to be used",
choices=locales,
)
tts_parser.add_argument(
"--tts_rate",
type=str,
help="Increase or decrease TTS speed",
choices=[str(i) for i in range(-100, 100)],
default="0",
)
tts_parser.add_argument(
"--f0up_key",
type=str,
help="Value for f0up_key",
choices=[str(i) for i in range(-24, 25)],
default="0",
)
tts_parser.add_argument(
"--filter_radius",
type=str,
help="Value for filter_radius",
choices=[str(i) for i in range(11)],
default="3",
)
tts_parser.add_argument(
"--index_rate",
type=str,
help="Value for index_rate",
choices=[str(i / 10) for i in range(11)],
default="0.3",
)
tts_parser.add_argument(
"--rms_mix_rate",
type=str,
help="Value for rms_mix_rate",
choices=[str(i / 10) for i in range(11)],
default="1",
)
tts_parser.add_argument(
"--protect",
type=str,
help="Value for protect",
choices=[str(i / 10) for i in range(6)],
default="0.33",
)
tts_parser.add_argument(
"--hop_length",
type=str,
help="Value for hop_length",
choices=[str(i) for i in range(1, 513)],
default="128",
)
tts_parser.add_argument(
"--f0method",
type=str,
help="Value for f0method",
choices=[
"pm",
"harvest",
"dio",
"crepe",
"crepe-tiny",
"rmvpe",
"fcpe",
"hybrid[crepe+rmvpe]",
"hybrid[crepe+fcpe]",
"hybrid[rmvpe+fcpe]",
"hybrid[crepe+rmvpe+fcpe]",
],
default="rmvpe",
)
tts_parser.add_argument("--output_tts_path", type=str, help="Output tts path")
tts_parser.add_argument("--output_rvc_path", type=str, help="Output rvc path")
tts_parser.add_argument("--pth_path", type=str, help="Path to the .pth file")
tts_parser.add_argument(
"--index_path",
type=str,
help="Path to the .index file",
)
tts_parser.add_argument(
"--split_audio",
type=str,
help="Enable split audio",
choices=["True", "False"],
default="False",
)
tts_parser.add_argument(
"--f0autotune",
type=str,
help="Enable autotune",
choices=["True", "False"],
default="False",
)
tts_parser.add_argument(
"--clean_audio",
type=str,
help="Enable clean audio",
choices=["True", "False"],
default="False",
)
tts_parser.add_argument(
"--clean_strength",
type=str,
help="Value for clean_strength",
choices=[str(i / 10) for i in range(11)],
default="0.7",
)
tts_parser.add_argument(
"--export_format",
type=str,
help="Export format",
choices=["WAV", "MP3", "FLAC", "OGG", "M4A"],
default="WAV",
)
tts_parser.add_argument(
"--embedder_model",
type=str,
help="Embedder model",
choices=["contentvec", "hubert", "custom"],
default="hubert",
)
tts_parser.add_argument(
"--embedder_model_custom",
type=str,
help="Custom Embedder model",
default=None,
)
tts_parser.add_argument(
"--upscale_audio",
type=str,
help="Enable audio upscaling",
choices=["True", "False"],
default="False",
)
# Parser for 'preprocess' mode
preprocess_parser = subparsers.add_parser("preprocess", help="Run preprocessing")
preprocess_parser.add_argument("--model_name", type=str, help="Name of the model")
preprocess_parser.add_argument(
"--dataset_path",
type=str,
help="Path to the dataset",
)
preprocess_parser.add_argument(
"--sampling_rate",
type=str,
help="Sampling rate",
choices=["32000", "40000", "48000"],
)
# Parser for 'extract' mode
extract_parser = subparsers.add_parser("extract", help="Run extract")
extract_parser.add_argument(
"--model_name",
type=str,
help="Name of the model",
)
extract_parser.add_argument(
"--rvc_version",
type=str,
help="Version of the model",
choices=["v1", "v2"],
default="v2",
)
extract_parser.add_argument(
"--f0method",
type=str,
help="Value for f0method",
choices=[
"pm",
"harvest",
"dio",
"crepe",
"crepe-tiny",
"rmvpe",
],
default="rmvpe",
)
extract_parser.add_argument(
"--hop_length",
type=str,
help="Value for hop_length",
choices=[str(i) for i in range(1, 513)],
default="128",
)
extract_parser.add_argument(
"--sampling_rate",
type=str,
help="Sampling rate",
choices=["32000", "40000", "48000"],
)
extract_parser.add_argument(
"--embedder_model",
type=str,
help="Embedder model",
choices=["contentvec", "hubert", "custom"],
default="hubert",
)
extract_parser.add_argument(
"--embedder_model_custom",
type=str,
help="Custom Embedder model",
default=None,
)
# Parser for 'train' mode
train_parser = subparsers.add_parser("train", help="Run training")
train_parser.add_argument(
"--model_name",
type=str,
help="Name of the model",
)
train_parser.add_argument(
"--rvc_version",
type=str,
help="Version of the model",
choices=["v1", "v2"],
default="v2",
)
train_parser.add_argument(
"--save_every_epoch",
type=str,
help="Save every epoch",
choices=[str(i) for i in range(1, 101)],
)
train_parser.add_argument(
"--save_only_latest",
type=str,
help="Save weight only at last epoch",
choices=["True", "False"],
default="False",
)
train_parser.add_argument(
"--save_every_weights",
type=str,
help="Save weight every epoch",
choices=["True", "False"],
default="True",
)
train_parser.add_argument(
"--total_epoch",
type=str,
help="Total epoch",
choices=[str(i) for i in range(1, 10001)],
default="1000",
)
train_parser.add_argument(
"--sampling_rate",
type=str,
help="Sampling rate",
choices=["32000", "40000", "48000"],
)
train_parser.add_argument(
"--batch_size",
type=str,
help="Batch size",
choices=[str(i) for i in range(1, 51)],
default="8",
)
train_parser.add_argument(
"--gpu",
type=str,
help="GPU number",
default="0",
)
train_parser.add_argument(
"--pitch_guidance",
type=str,
help="Pitch guidance",
choices=["True", "False"],
default="True",
)
train_parser.add_argument(
"--pretrained",
type=str,
help="Pretrained",
choices=["True", "False"],
default="True",
)
train_parser.add_argument(
"--custom_pretrained",
type=str,
help="Custom pretrained",
choices=["True", "False"],
default="False",
)
train_parser.add_argument(
"--g_pretrained_path",
type=str,
nargs="?",
default=None,
help="Path to the pretrained G file",
)
train_parser.add_argument(
"--d_pretrained_path",
type=str,
nargs="?",
default=None,
help="Path to the pretrained D file",
)
train_parser.add_argument(
"--overtraining_detector",
type=str,
help="Overtraining detector",
choices=["True", "False"],
default="False",
)
train_parser.add_argument(
"--overtraining_threshold",
type=str,
help="Overtraining threshold",
choices=[str(i) for i in range(1, 101)],
default="50",
)
train_parser.add_argument(
"--sync_graph",
type=str,
help="Sync graph",
choices=["True", "False"],
default="False",
)
# Parser for 'index' mode
index_parser = subparsers.add_parser("index", help="Generate index file")
index_parser.add_argument(
"--model_name",
type=str,
help="Name of the model",
)
index_parser.add_argument(
"--rvc_version",
type=str,
help="Version of the model",
choices=["v1", "v2"],
default="v2",
)
# Parser for 'model_extract' mode
model_extract_parser = subparsers.add_parser("model_extract", help="Extract model")
model_extract_parser.add_argument(
"--pth_path",
type=str,
help="Path to the .pth file",
)
model_extract_parser.add_argument(
"--model_name",
type=str,
help="Name of the model",
)
model_extract_parser.add_argument(
"--sampling_rate",
type=str,
help="Sampling rate",
choices=["40000", "48000"],
)
model_extract_parser.add_argument(
"--pitch_guidance",
type=str,
help="Pitch guidance",
choices=["True", "False"],
)
model_extract_parser.add_argument(
"--rvc_version",
type=str,
help="Version of the model",
choices=["v1", "v2"],
default="v2",
)
model_extract_parser.add_argument(
"--epoch",
type=str,
help="Epochs of the model",
choices=[str(i) for i in range(1, 10001)],
)
model_extract_parser.add_argument(
"--step",
type=str,
help="Steps of the model",
)
# Parser for 'model_information' mode
model_information_parser = subparsers.add_parser(
"model_information", help="Print model information"
)
model_information_parser.add_argument(
"--pth_path",
type=str,
help="Path to the .pth file",
)
# Parser for 'model_blender' mode
model_blender_parser = subparsers.add_parser(
"model_blender", help="Fuse two models"
)
model_blender_parser.add_argument(
"--model_name",
type=str,
help="Name of the model",
)
model_blender_parser.add_argument(
"--pth_path_1",
type=str,
help="Path to the first .pth file",
)
model_blender_parser.add_argument(
"--pth_path_2",
type=str,
help="Path to the second .pth file",
)
model_blender_parser.add_argument(
"--ratio",
type=str,
help="Value for blender ratio",
choices=[str(i / 10) for i in range(11)],
default="0.5",
)
# Parser for 'tensorboard' mode
subparsers.add_parser("tensorboard", help="Run tensorboard")
# Parser for 'download' mode
download_parser = subparsers.add_parser("download", help="Download models")
download_parser.add_argument(
"--model_link",
type=str,
help="Link of the model",
)
# Parser for 'prerequisites' mode
prerequisites_parser = subparsers.add_parser(
"prerequisites", help="Install prerequisites"
)
prerequisites_parser.add_argument(
"--pretraineds_v1",
type=str,
choices=["True", "False"],
default="True",
help="Download pretrained models for v1",
)
prerequisites_parser.add_argument(
"--pretraineds_v2",
type=str,
choices=["True", "False"],
default="True",
help="Download pretrained models for v2",
)
prerequisites_parser.add_argument(
"--models",
type=str,
choices=["True", "False"],
default="True",
help="Donwload models",
)
prerequisites_parser.add_argument(
"--exe",
type=str,
choices=["True", "False"],
default="True",
help="Download executables",
)
# Parser for 'audio_analyzer' mode
audio_analyzer = subparsers.add_parser("audio_analyzer", help="Run audio analyzer")
audio_analyzer.add_argument(
"--input_path",
type=str,
help="Path to the input audio file",
)
# Parser for 'api' mode
api_parser = subparsers.add_parser("api", help="Run the API")
api_parser.add_argument(
"--host", type=str, help="Host address", default="127.0.0.1"
)
api_parser.add_argument("--port", type=str, help="Port", default="8000")
return parser.parse_args()
def main():
if len(sys.argv) == 1:
print("Please run the script with '-h' for more information.")
sys.exit(1)
args = parse_arguments()
try:
if args.mode == "infer":
run_infer_script(
str(args.f0up_key),
str(args.filter_radius),
str(args.index_rate),
str(args.rms_mix_rate),
str(args.protect),
str(args.hop_length),
str(args.f0method),
str(args.input_path),
str(args.output_path),
str(args.pth_path),
str(args.index_path),
str(args.split_audio),
str(args.f0autotune),
str(args.clean_audio),
str(args.clean_strength),
str(args.export_format),
str(args.embedder_model),
str(args.embedder_model_custom),
str(args.upscale_audio),
)
elif args.mode == "batch_infer":
run_batch_infer_script(
str(args.f0up_key),
str(args.filter_radius),
str(args.index_rate),
str(args.rms_mix_rate),
str(args.protect),
str(args.hop_length),
str(args.f0method),
str(args.input_folder),
str(args.output_folder),
str(args.pth_path),
str(args.index_path),
str(args.split_audio),
str(args.f0autotune),
str(args.clean_audio),
str(args.clean_strength),
str(args.export_format),
str(args.embedder_model),
str(args.embedder_model_custom),
str(args.upscale_audio),
)
elif args.mode == "tts":
run_tts_script(
str(args.tts_text),
str(args.tts_voice),
str(args.tts_rate),
str(args.f0up_key),
str(args.filter_radius),
str(args.index_rate),
str(args.rms_mix_rate),
str(args.protect),
str(args.hop_length),
str(args.f0method),
str(args.output_tts_path),
str(args.output_rvc_path),
str(args.pth_path),
str(args.index_path),
str(args.split_audio),
str(args.f0autotune),
str(args.clean_audio),
str(args.clean_strength),
str(args.export_format),
str(args.embedder_model),
str(args.embedder_model_custom),
str(args.upscale_audio),
)
elif args.mode == "preprocess":
run_preprocess_script(
str(args.model_name),
str(args.dataset_path),
str(args.sampling_rate),
)
elif args.mode == "extract":
run_extract_script(
str(args.model_name),
str(args.rvc_version),
str(args.f0method),
str(args.hop_length),
str(args.sampling_rate),
str(args.embedder_model),
str(args.embedder_model_custom),
)
elif args.mode == "train":
run_train_script(
str(args.model_name),
str(args.rvc_version),
str(args.save_every_epoch),
str(args.save_only_latest),
str(args.save_every_weights),
str(args.total_epoch),
str(args.sampling_rate),
str(args.batch_size),
str(args.gpu),
str(args.pitch_guidance),
str(args.overtraining_detector),
str(args.overtraining_threshold),
str(args.pretrained),
str(args.custom_pretrained),
str(args.sync_graph),
str(args.g_pretrained_path),
str(args.d_pretrained_path),
)
elif args.mode == "index":
run_index_script(
str(args.model_name),
str(args.rvc_version),
)
elif args.mode == "model_extract":
run_model_extract_script(
str(args.pth_path),
str(args.model_name),
str(args.sampling_rate),
str(args.pitch_guidance),
str(args.rvc_version),
str(args.epoch),
str(args.step),
)
elif args.mode == "model_information":
run_model_information_script(
str(args.pth_path),
)
elif args.mode == "model_blender":
run_model_blender_script(
str(args.model_name),
str(args.pth_path_1),
str(args.pth_path_2),
str(args.ratio),
)
elif args.mode == "tensorboard":
run_tensorboard_script()
elif args.mode == "download":
run_download_script(
str(args.model_link),
)
elif args.mode == "prerequisites":
run_prerequisites_script(
str(args.pretraineds_v1),
str(args.pretraineds_v2),
str(args.models),
str(args.exe),
)
elif args.mode == "audio_analyzer":
run_audio_analyzer_script(
str(args.input_path),
)
elif args.mode == "api":
run_api_script(
str(args.host),
str(args.port),
)
except Exception as error:
print(f"Error: {error}")
if __name__ == "__main__":
main()