Spaces:

IAHispano
/

Applio

Running

App Files Files Community

Applio / tabs /tts /tts.py

aitronz

initial

85d3b29 11 months ago

raw

history blame

10.2 kB

	import os, sys
	import gradio as gr
	import regex as re
	import json
	import shutil
	import datetime
	import random

	from core import (
	run_tts_script,
	)

	from assets.i18n.i18n import I18nAuto

	i18n = I18nAuto()

	now_dir = os.getcwd()
	sys.path.append(now_dir)

	model_root = os.path.join(now_dir, "logs")
	audio_root = os.path.join(now_dir, "assets", "audios")

	model_root_relative = os.path.relpath(model_root, now_dir)
	audio_root_relative = os.path.relpath(audio_root, now_dir)

	sup_audioext = {
	"wav",
	"mp3",
	"flac",
	"ogg",
	"opus",
	"m4a",
	"mp4",
	"aac",
	"alac",
	"wma",
	"aiff",
	"webm",
	"ac3",
	}

	names = [
	os.path.join(root, file)
	for root, _, files in os.walk(model_root_relative, topdown=False)
	for file in files
	if (
	file.endswith((".pth", ".onnx"))
	and not (file.startswith("G_") or file.startswith("D_"))
	)
	]

	indexes_list = [
	os.path.join(root, name)
	for root, _, files in os.walk(model_root_relative, topdown=False)
	for name in files
	if name.endswith(".index") and "trained" not in name
	]

	audio_paths = [
	os.path.join(root, name)
	for root, _, files in os.walk(audio_root_relative, topdown=False)
	for name in files
	if name.endswith(tuple(sup_audioext))
	and root == audio_root_relative
	and "_output" not in name
	]


	def change_choices():
	names = [
	os.path.join(root, file)
	for root, _, files in os.walk(model_root_relative, topdown=False)
	for file in files
	if (
	file.endswith((".pth", ".onnx"))
	and not (file.startswith("G_") or file.startswith("D_"))
	)
	]

	indexes_list = [
	os.path.join(root, name)
	for root, _, files in os.walk(model_root_relative, topdown=False)
	for name in files
	if name.endswith(".index") and "trained" not in name
	]

	audio_paths = [
	os.path.join(root, name)
	for root, _, files in os.walk(audio_root_relative, topdown=False)
	for name in files
	if name.endswith(tuple(sup_audioext))
	and root == audio_root_relative
	and "_output" not in name
	]
	return (
	{"choices": sorted(names), "__type__": "update"},
	{"choices": sorted(indexes_list), "__type__": "update"},
	{"choices": sorted(audio_paths), "__type__": "update"},
	)


	def get_indexes():
	indexes_list = [
	os.path.join(dirpath, filename)
	for dirpath, _, filenames in os.walk(model_root_relative)
	for filename in filenames
	if filename.endswith(".index") and "trained" not in filename
	]

	return indexes_list if indexes_list else ""


	def match_index(model_file: str) -> tuple:
	model_files_trip = re.sub(r"\.pth\|\.onnx$", "", model_file)
	model_file_name = os.path.split(model_files_trip)[
	-1
	] # Extract only the name, not the directory

	# Check if the sid0strip has the specific ending format _eXXX_sXXX
	if re.match(r".+_e\d+_s\d+$", model_file_name):
	base_model_name = model_file_name.rsplit("_", 2)[0]
	else:
	base_model_name = model_file_name

	sid_directory = os.path.join(model_root_relative, base_model_name)
	directories_to_search = [sid_directory] if os.path.exists(sid_directory) else []
	directories_to_search.append(model_root_relative)

	matching_index_files = []

	for directory in directories_to_search:
	for filename in os.listdir(directory):
	if filename.endswith(".index") and "trained" not in filename:
	# Condition to match the name
	name_match = any(
	name.lower() in filename.lower()
	for name in [model_file_name, base_model_name]
	)

	# If in the specific directory, it's automatically a match
	folder_match = directory == sid_directory

	if name_match or folder_match:
	index_path = os.path.join(directory, filename)
	if index_path in indexes_list:
	matching_index_files.append(
	(
	index_path,
	os.path.getsize(index_path),
	" " not in filename,
	)
	)

	if matching_index_files:
	# Sort by favoring files without spaces and by size (largest size first)
	matching_index_files.sort(key=lambda x: (-x[2], -x[1]))
	best_match_index_path = matching_index_files[0][0]
	return best_match_index_path

	return ""


	def save_to_wav(record_button):
	if record_button is None:
	pass
	else:
	path_to_file = record_button
	new_name = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S") + ".wav"
	target_path = os.path.join(audio_root_relative, os.path.basename(new_name))

	shutil.move(path_to_file, target_path)
	return target_path


	def save_to_wav2(upload_audio):
	file_path = upload_audio
	target_path = os.path.join(audio_root_relative, os.path.basename(file_path))

	if os.path.exists(target_path):
	os.remove(target_path)

	shutil.copy(file_path, target_path)
	return target_path


	def delete_outputs():
	for root, _, files in os.walk(audio_root_relative, topdown=False):
	for name in files:
	if name.endswith(tuple(sup_audioext)) and name.__contains__("_output"):
	os.remove(os.path.join(root, name))
	gr.Info(f"Outputs cleared!")


	def tts_tab():
	default_weight = random.choice(names) if names else ""
	with gr.Row():
	with gr.Row():
	model_file = gr.Dropdown(
	label=i18n("Voice Model"),
	choices=sorted(names, key=lambda path: os.path.getsize(path)),
	interactive=True,
	value=default_weight,
	allow_custom_value=True,
	)
	best_default_index_path = match_index(model_file.value)
	index_file = gr.Dropdown(
	label=i18n("Index File"),
	choices=get_indexes(),
	value=best_default_index_path,
	interactive=True,
	allow_custom_value=True,
	)
	with gr.Column():
	refresh_button = gr.Button(i18n("Refresh"))
	unload_button = gr.Button(i18n("Unload Voice"))

	unload_button.click(
	fn=lambda: ({"value": "", "__type__": "update"}),
	inputs=[],
	outputs=[model_file],
	)

	model_file.select(
	fn=match_index,
	inputs=[model_file],
	outputs=[index_file],
	)

	json_path = os.path.join("rvc", "lib", "tools", "tts_voices.json")
	with open(json_path, "r") as file:
	tts_voices_data = json.load(file)

	short_names = [voice.get("ShortName", "") for voice in tts_voices_data]

	tts_voice = gr.Dropdown(
	label=i18n("TTS Voices"),
	choices=short_names,
	interactive=True,
	value=None,
	)

	tts_text = gr.Textbox(
	label=i18n("Text to Synthesize"),
	placeholder=i18n("Enter text to synthesize"),
	lines=3,
	)

	with gr.Accordion(i18n("Advanced Settings"), open=False):
	with gr.Column():
	output_tts_path = gr.Textbox(
	label=i18n("Output Path for TTS Audio"),
	placeholder=i18n("Enter output path"),
	value=os.path.join(now_dir, "assets", "audios", "tts_output.wav"),
	interactive=True,
	)

	output_rvc_path = gr.Textbox(
	label=i18n("Output Path for RVC Audio"),
	placeholder=i18n("Enter output path"),
	value=os.path.join(now_dir, "assets", "audios", "tts_rvc_output.wav"),
	interactive=True,
	)

	pitch = gr.Slider(
	minimum=-24,
	maximum=24,
	step=1,
	label=i18n("Pitch"),
	value=0,
	interactive=True,
	)
	filter_radius = gr.Slider(
	minimum=0,
	maximum=7,
	label=i18n(
	"If >=3: apply median filtering to the harvested pitch results. The value represents the filter radius and can reduce breathiness"
	),
	value=3,
	step=1,
	interactive=True,
	)
	index_rate = gr.Slider(
	minimum=0,
	maximum=1,
	label=i18n("Search Feature Ratio"),
	value=0.75,
	interactive=True,
	)
	hop_length = gr.Slider(
	minimum=1,
	maximum=512,
	step=1,
	label=i18n("Hop Length"),
	value=128,
	interactive=True,
	)
	with gr.Column():
	f0method = gr.Radio(
	label=i18n("Pitch extraction algorithm"),
	choices=[
	"pm",
	"harvest",
	"dio",
	"crepe",
	"crepe-tiny",
	"rmvpe",
	],
	value="rmvpe",
	interactive=True,
	)

	convert_button1 = gr.Button(i18n("Convert"))

	with gr.Row(): # Defines output info + output audio download after conversion
	vc_output1 = gr.Textbox(label=i18n("Output Information"))
	vc_output2 = gr.Audio(label=i18n("Export Audio"))

	refresh_button.click(
	fn=change_choices,
	inputs=[],
	outputs=[model_file, index_file],
	)
	convert_button1.click(
	fn=run_tts_script,
	inputs=[
	tts_text,
	tts_voice,
	pitch,
	filter_radius,
	index_rate,
	hop_length,
	f0method,
	output_tts_path,
	output_rvc_path,
	model_file,
	index_file,
	],
	outputs=[vc_output1, vc_output2],
	)