hex-rvc / module.py
Hev832's picture
Update module.py
af2ee36 verified
import os
import re
import random
from scipy.io.wavfile import write
from scipy.io.wavfile import read
import numpy as np
import gradio as gr
import yt_dlp
import subprocess
from pydub import AudioSegment
from audio_separator.separator import Separator
from lib.infer import infer_audio
import edge_tts
import tempfile
import anyio
from pathlib import Path
from lib.language_tts import language_dict
import shutil
import time
from argparse import ArgumentParser
from download_model import download_online_model
main_dir = Path().resolve()
print(main_dir)
os.chdir(main_dir)
models_dir = main_dir / "rvc_models"
audio_separat_dir = main_dir / "audio_input"
AUDIO_DIR = main_dir / 'audio_input'
# Function to list all folders in the models directory
def get_folders():
if models_dir.exists() and models_dir.is_dir():
return [folder.name for folder in models_dir.iterdir() if folder.is_dir()]
return []
# Function to refresh and return the list of folders
def refresh_folders():
return gr.Dropdown.update(choices=get_folders())
# Function to get the list of audio files in the specified directory
def get_audio_files():
if not os.path.exists(AUDIO_DIR):
os.makedirs(AUDIO_DIR)
return [f for f in os.listdir(AUDIO_DIR) if f.lower().endswith(('.mp3', '.wav', '.flac', '.ogg', '.aac'))]
# Function to return the full path of audio files for playback
def load_audio_files():
audio_files = get_audio_files()
return [os.path.join(AUDIO_DIR, f) for f in audio_files]
def refresh_audio_list():
audio_files = load_audio_files()
return gr.Dropdown.update(choices=audio_files)
def download_audio(url):
ydl_opts = {
'format': 'bestaudio/best',
'outtmpl': 'ytdl/%(title)s.%(ext)s',
'postprocessors': [{
'key': 'FFmpegExtractAudio',
'preferredcodec': 'wav',
'preferredquality': '192',
}],
}
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
info_dict = ydl.extract_info(url, download=True)
file_path = ydl.prepare_filename(info_dict).rsplit('.', 1)[0] + '.wav'
return file_path
async def text_to_speech_edge(text, language_code):
voice = language_dict.get(language_code, "default_voice")
communicate = edge_tts.Communicate(text, voice)
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
tmp_path = tmp_file.name
await communicate.save(tmp_path)
return tmp_path
if __name__ == '__main__':
parser = ArgumentParser()
parser.add_argument("--share", action="store_true", dest="share_enabled", default=False)
parser.add_argument("--listen", action="store_true", default=False)
parser.add_argument('--listen-host', type=str)
parser.add_argument('--listen-port', type=int)
args = parser.parse_args()
# Gradio Interface
with gr.Blocks(title="Hex RVC", theme=gr.themes.Base(primary_hue="red", secondary_hue="pink")) as app:
gr.Markdown("# Hex RVC")
gr.Markdown("Join [AIHub](https://discord.gg/aihub) to get the RVC model!")
with gr.Tab("Inference"):
with gr.Row():
MODEL_NAME = gr.Dropdown(
label="Select a Model",
choices=get_folders(),
interactive=True
)
SOUND_PATH = gr.Dropdown(
choices=load_audio_files(),
label="Select an audio file",
interactive=True
)
upload_audio = gr.Audio(label="Upload Audio", type='filepath')
with gr.Accordion("Hex TTS"):
input_text = gr.Textbox(lines=5, label="Input Text")
language = gr.Dropdown(choices=list(language_dict.keys()), label="Choose the Voice Model")
tts_convert = gr.Button("Convert")
tts_output = gr.Audio(label="Generated TTS Audio", type='filepath')
tts_convert.click(
fn=text_to_speech_edge,
inputs=[input_text, language],
outputs=tts_output
)
output_audio = gr.Audio(label="Generated Audio", type='filepath')
with gr.Row():
refresh_btn = gr.Button("Refresh")
run_button = gr.Button("Convert")
refresh_btn.click(
lambda: (refresh_audio_list(), refresh_folders()),
outputs=[SOUND_PATH, MODEL_NAME]
)
with gr.Tab("Download RVC Model"):
url = gr.Textbox(label="Your Model URL")
dirname = gr.Textbox(label="Your Model Name")
download_button = gr.Button("Download Model")
download_output = gr.Textbox(label="Download Status")
download_button.click(
download_online_model,
inputs=[url, dirname],
outputs=download_output
)
with gr.Tab("UVR5 (Demo)"):
input_audio = gr.Audio(type="filepath", label="Upload Audio")
roformer_link = gr.Textbox(label="Audio Link")
roformer_download_button = gr.Button("Download")
separate_button = gr.Button("Separate Audio")
separation_output = gr.Textbox(label="Separation Output Path")
roformer_download_button.click(download_audio, inputs=[roformer_link], outputs=[input_audio])
app.launch(
share=args.share_enabled,
server_name=None if not args.listen else (args.listen_host or '0.0.0.0'),
server_port=args.listen_port,
)