Applio / tabs /inference /inference.py
Aitron Emper
Upload inference.py
037701f verified
raw
history blame
15.2 kB
import os, sys
import gradio as gr
import regex as re
import shutil
import datetime
import random
from core import (
run_infer_script,
run_batch_infer_script,
)
from assets.i18n.i18n import I18nAuto
from rvc.lib.utils import format_title
i18n = I18nAuto()
now_dir = os.getcwd()
sys.path.append(now_dir)
model_root = os.path.join(now_dir, "logs")
audio_root = os.path.join(now_dir, "assets", "audios")
model_root_relative = os.path.relpath(model_root, now_dir)
audio_root_relative = os.path.relpath(audio_root, now_dir)
sup_audioext = {
"wav",
"mp3",
"flac",
"ogg",
"opus",
"m4a",
"mp4",
"aac",
"alac",
"wma",
"aiff",
"webm",
"ac3",
}
names = [
os.path.join(root, file)
for root, _, files in os.walk(model_root_relative, topdown=False)
for file in files
if (
file.endswith((".pth", ".onnx"))
and not (file.startswith("G_") or file.startswith("D_"))
)
]
indexes_list = [
os.path.join(root, name)
for root, _, files in os.walk(model_root_relative, topdown=False)
for name in files
if name.endswith(".index") and "trained" not in name
]
audio_paths = [
os.path.join(root, name)
for root, _, files in os.walk(audio_root_relative, topdown=False)
for name in files
if name.endswith(tuple(sup_audioext))
and root == audio_root_relative
and "_output" not in name
]
def output_path_fn(input_audio_path):
original_name_without_extension = os.path.basename(input_audio_path).rsplit(".", 1)[
0
]
new_name = original_name_without_extension + "_output.wav"
output_path = os.path.join(os.path.dirname(input_audio_path), new_name)
return output_path
def change_choices():
names = [
os.path.join(root, file)
for root, _, files in os.walk(model_root_relative, topdown=False)
for file in files
if (
file.endswith((".pth", ".onnx"))
and not (file.startswith("G_") or file.startswith("D_"))
)
]
indexes_list = [
os.path.join(root, name)
for root, _, files in os.walk(model_root_relative, topdown=False)
for name in files
if name.endswith(".index") and "trained" not in name
]
audio_paths = [
os.path.join(root, name)
for root, _, files in os.walk(audio_root_relative, topdown=False)
for name in files
if name.endswith(tuple(sup_audioext))
and root == audio_root_relative
and "_output" not in name
]
return (
{"choices": sorted(names), "__type__": "update"},
{"choices": sorted(indexes_list), "__type__": "update"},
{"choices": sorted(audio_paths), "__type__": "update"},
)
def get_indexes():
indexes_list = [
os.path.join(dirpath, filename)
for dirpath, _, filenames in os.walk(model_root_relative)
for filename in filenames
if filename.endswith(".index") and "trained" not in filename
]
return indexes_list if indexes_list else ""
def match_index(model_file: str) -> tuple:
model_files_trip = re.sub(r"\.pth|\.onnx$", "", model_file)
model_file_name = os.path.split(model_files_trip)[
-1
] # Extract only the name, not the directory
# Check if the sid0strip has the specific ending format _eXXX_sXXX
if re.match(r".+_e\d+_s\d+$", model_file_name):
base_model_name = model_file_name.rsplit("_", 2)[0]
else:
base_model_name = model_file_name
sid_directory = os.path.join(model_root_relative, base_model_name)
directories_to_search = [sid_directory] if os.path.exists(sid_directory) else []
directories_to_search.append(model_root_relative)
matching_index_files = []
for directory in directories_to_search:
for filename in os.listdir(directory):
if filename.endswith(".index") and "trained" not in filename:
# Condition to match the name
name_match = any(
name.lower() in filename.lower()
for name in [model_file_name, base_model_name]
)
# If in the specific directory, it's automatically a match
folder_match = directory == sid_directory
if name_match or folder_match:
index_path = os.path.join(directory, filename)
updated_indexes_list = get_indexes()
if index_path in updated_indexes_list:
matching_index_files.append(
(
index_path,
os.path.getsize(index_path),
" " not in filename,
)
)
if matching_index_files:
# Sort by favoring files without spaces and by size (largest size first)
matching_index_files.sort(key=lambda x: (-x[2], -x[1]))
best_match_index_path = matching_index_files[0][0]
return best_match_index_path
return ""
def save_to_wav(record_button):
if record_button is None:
pass
else:
path_to_file = record_button
new_name = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S") + ".wav"
target_path = os.path.join(audio_root_relative, os.path.basename(new_name))
shutil.move(path_to_file, target_path)
return target_path, output_path_fn(target_path)
def save_to_wav2(upload_audio):
file_path = upload_audio
formated_name = format_title(os.path.basename(file_path))
target_path = os.path.join(audio_root_relative, formated_name)
if os.path.exists(target_path):
os.remove(target_path)
shutil.copy(file_path, target_path)
return target_path, output_path_fn(target_path)
def delete_outputs():
for root, _, files in os.walk(audio_root_relative, topdown=False):
for name in files:
if name.endswith(tuple(sup_audioext)) and name.__contains__("_output"):
os.remove(os.path.join(root, name))
gr.Info(f"Outputs cleared!")
# Inference tab
def inference_tab():
default_weight = random.choice(names) if names else None
with gr.Row():
with gr.Row():
model_file = gr.Dropdown(
label=i18n("Voice Model"),
choices=sorted(names, key=lambda path: os.path.getsize(path)),
interactive=True,
value=default_weight,
allow_custom_value=True,
)
index_file = gr.Dropdown(
label=i18n("Index File"),
choices=get_indexes(),
value=match_index(default_weight) if default_weight else "",
interactive=True,
allow_custom_value=True,
)
with gr.Column():
refresh_button = gr.Button(i18n("Refresh"))
unload_button = gr.Button(i18n("Unload Voice"))
unload_button.click(
fn=lambda: ({"value": "", "__type__": "update"}),
inputs=[],
outputs=[model_file],
)
model_file.select(
fn=match_index,
inputs=[model_file],
outputs=[index_file],
)
# Single inference tab
with gr.Tab(i18n("Single")):
with gr.Row():
with gr.Column():
upload_audio = gr.Audio(
label=i18n("Upload Audio"), type="filepath", editable=False
)
with gr.Row():
audio = gr.Dropdown(
label=i18n("Select Audio"),
choices=sorted(audio_paths),
value=audio_paths[0] if audio_paths else "",
interactive=True,
allow_custom_value=True,
)
with gr.Accordion(i18n("Advanced Settings"), open=False):
with gr.Column():
clear_outputs = gr.Button(
i18n("Clear Outputs (Deletes all audios in assets/audios)")
)
output_path = gr.Textbox(
label=i18n("Output Path"),
placeholder=i18n("Enter output path"),
value=(
output_path_fn(audio_paths[0])
if audio_paths
else os.path.join(now_dir, "assets", "audios", "output.wav")
),
interactive=True,
)
split_audio = gr.Checkbox(
label=i18n("Split Audio"),
visible=True,
value=False,
interactive=True,
)
pitch = gr.Slider(
minimum=-24,
maximum=24,
step=1,
label=i18n("Pitch"),
value=0,
interactive=True,
)
filter_radius = gr.Slider(
minimum=0,
maximum=7,
label=i18n(
"If >=3: apply median filtering to the harvested pitch results. The value represents the filter radius and can reduce breathiness"
),
value=3,
step=1,
interactive=True,
)
index_rate = gr.Slider(
minimum=0,
maximum=1,
label=i18n("Search Feature Ratio"),
value=0.75,
interactive=True,
)
hop_length = gr.Slider(
minimum=1,
maximum=512,
step=1,
label=i18n("Hop Length"),
value=128,
interactive=True,
)
with gr.Column():
f0method = gr.Radio(
label=i18n("Pitch extraction algorithm"),
choices=[
"pm",
"harvest",
"dio",
"crepe",
"crepe-tiny",
"rmvpe",
],
value="rmvpe",
interactive=True,
)
convert_button1 = gr.Button(i18n("Convert"))
with gr.Row(): # Defines output info + output audio download after conversion
vc_output1 = gr.Textbox(label=i18n("Output Information"))
vc_output2 = gr.Audio(label=i18n("Export Audio"))
# Batch inference tab
with gr.Tab(i18n("Batch")):
with gr.Row():
with gr.Column():
input_folder_batch = gr.Textbox(
label=i18n("Input Folder"),
placeholder=i18n("Enter input path"),
value=os.path.join(now_dir, "assets", "audios"),
interactive=True,
)
output_folder_batch = gr.Textbox(
label=i18n("Output Folder"),
placeholder=i18n("Enter output path"),
value=os.path.join(now_dir, "assets", "audios"),
interactive=True,
)
with gr.Accordion(i18n("Advanced Settings"), open=False):
with gr.Column():
clear_outputs = gr.Button(
i18n("Clear Outputs (Deletes all audios in assets/audios)")
)
split_audio_batch = gr.Checkbox(
label=i18n("Split Audio"),
visible=True,
value=False,
interactive=True,
)
pitch_batch = gr.Slider(
minimum=-24,
maximum=24,
step=1,
label=i18n("Pitch"),
value=0,
interactive=True,
)
filter_radius_batch = gr.Slider(
minimum=0,
maximum=7,
label=i18n(
"If >=3: apply median filtering to the harvested pitch results. The value represents the filter radius and can reduce breathiness"
),
value=3,
step=1,
interactive=True,
)
index_rate_batch = gr.Slider(
minimum=0,
maximum=1,
label=i18n("Search Feature Ratio"),
value=0.75,
interactive=True,
)
hop_length_batch = gr.Slider(
minimum=1,
maximum=512,
step=1,
label=i18n("Hop Length"),
value=128,
interactive=True,
)
with gr.Column():
f0method_batch = gr.Radio(
label=i18n("Pitch extraction algorithm"),
choices=[
"pm",
"harvest",
"dio",
"crepe",
"crepe-tiny",
"rmvpe",
],
value="rmvpe",
interactive=True,
)
convert_button2 = gr.Button(i18n("Convert"))
with gr.Row(): # Defines output info + output audio download after conversion
vc_output3 = gr.Textbox(label=i18n("Output Information"))
def toggle_visible(checkbox):
return {"visible": checkbox, "__type__": "update"}
refresh_button.click(
fn=change_choices,
inputs=[],
outputs=[model_file, index_file, audio],
)
audio.change(
fn=output_path_fn,
inputs=[audio],
outputs=[output_path],
)
upload_audio.upload(
fn=save_to_wav2,
inputs=[upload_audio],
outputs=[audio, output_path],
)
upload_audio.stop_recording(
fn=save_to_wav,
inputs=[upload_audio],
outputs=[audio, output_path],
)
clear_outputs.click(
fn=delete_outputs,
inputs=[],
outputs=[],
)
convert_button1.click(
fn=run_infer_script,
inputs=[
pitch,
filter_radius,
index_rate,
hop_length,
f0method,
audio,
output_path,
model_file,
index_file,
split_audio,
],
outputs=[vc_output1, vc_output2],
)
convert_button2.click(
fn=run_batch_infer_script,
inputs=[
pitch_batch,
filter_radius_batch,
index_rate_batch,
hop_length_batch,
f0method_batch,
input_folder_batch,
output_folder_batch,
model_file,
index_file,
split_audio_batch,
],
outputs=[vc_output3],
)