import os import re import random from scipy.io.wavfile import write from scipy.io.wavfile import read import numpy as np import gradio as gr import yt_dlp import subprocess from pydub import AudioSegment from audio_separator.separator import Separator from lib.infer import infer_audio import edge_tts import tempfile import anyio from pathlib import Path from lib.language_tts import language_dict import shutil import time from argparse import ArgumentParser from download_model import download_online_model main_dir = Path().resolve() print(main_dir) os.chdir(main_dir) models_dir = main_dir / "rvc_models" audio_separat_dir = main_dir / "audio_input" AUDIO_DIR = main_dir / 'audio_input' # Function to list all folders in the models directory def get_folders(): if models_dir.exists() and models_dir.is_dir(): return [folder.name for folder in models_dir.iterdir() if folder.is_dir()] return [] # Function to refresh and return the list of folders def refresh_folders(): return gr.Dropdown.update(choices=get_folders()) # Function to get the list of audio files in the specified directory def get_audio_files(): if not os.path.exists(AUDIO_DIR): os.makedirs(AUDIO_DIR) return [f for f in os.listdir(AUDIO_DIR) if f.lower().endswith(('.mp3', '.wav', '.flac', '.ogg', '.aac'))] # Function to return the full path of audio files for playback def load_audio_files(): audio_files = get_audio_files() return [os.path.join(AUDIO_DIR, f) for f in audio_files] def refresh_audio_list(): audio_files = load_audio_files() return gr.Dropdown.update(choices=audio_files) def download_audio(url): ydl_opts = { 'format': 'bestaudio/best', 'outtmpl': 'ytdl/%(title)s.%(ext)s', 'postprocessors': [{ 'key': 'FFmpegExtractAudio', 'preferredcodec': 'wav', 'preferredquality': '192', }], } with yt_dlp.YoutubeDL(ydl_opts) as ydl: info_dict = ydl.extract_info(url, download=True) file_path = ydl.prepare_filename(info_dict).rsplit('.', 1)[0] + '.wav' return file_path async def text_to_speech_edge(text, language_code): voice = language_dict.get(language_code, "default_voice") communicate = edge_tts.Communicate(text, voice) with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file: tmp_path = tmp_file.name await communicate.save(tmp_path) return tmp_path if __name__ == '__main__': parser = ArgumentParser() parser.add_argument("--share", action="store_true", dest="share_enabled", default=False) parser.add_argument("--listen", action="store_true", default=False) parser.add_argument('--listen-host', type=str) parser.add_argument('--listen-port', type=int) args = parser.parse_args() # Gradio Interface with gr.Blocks(title="Hex RVC", theme=gr.themes.Base(primary_hue="red", secondary_hue="pink")) as app: gr.Markdown("# Hex RVC") gr.Markdown("Join [AIHub](https://discord.gg/aihub) to get the RVC model!") with gr.Tab("Inference"): with gr.Row(): MODEL_NAME = gr.Dropdown( label="Select a Model", choices=get_folders(), interactive=True ) SOUND_PATH = gr.Dropdown( choices=load_audio_files(), label="Select an audio file", interactive=True ) upload_audio = gr.Audio(label="Upload Audio", type='filepath') with gr.Accordion("Hex TTS"): input_text = gr.Textbox(lines=5, label="Input Text") language = gr.Dropdown(choices=list(language_dict.keys()), label="Choose the Voice Model") tts_convert = gr.Button("Convert") tts_output = gr.Audio(label="Generated TTS Audio", type='filepath') tts_convert.click( fn=text_to_speech_edge, inputs=[input_text, language], outputs=tts_output ) output_audio = gr.Audio(label="Generated Audio", type='filepath') with gr.Row(): refresh_btn = gr.Button("Refresh") run_button = gr.Button("Convert") refresh_btn.click( lambda: (refresh_audio_list(), refresh_folders()), outputs=[SOUND_PATH, MODEL_NAME] ) with gr.Tab("Download RVC Model"): url = gr.Textbox(label="Your Model URL") dirname = gr.Textbox(label="Your Model Name") download_button = gr.Button("Download Model") download_output = gr.Textbox(label="Download Status") download_button.click( download_online_model, inputs=[url, dirname], outputs=download_output ) with gr.Tab("UVR5 (Demo)"): input_audio = gr.Audio(type="filepath", label="Upload Audio") roformer_link = gr.Textbox(label="Audio Link") roformer_download_button = gr.Button("Download") separate_button = gr.Button("Separate Audio") separation_output = gr.Textbox(label="Separation Output Path") roformer_download_button.click(download_audio, inputs=[roformer_link], outputs=[input_audio]) app.launch( share=args.share_enabled, server_name=None if not args.listen else (args.listen_host or '0.0.0.0'), server_port=args.listen_port, )