import os import gradio as gr from pydub import AudioSegment from audio_separator.separator import Separator from lib.infer import infer_audio # Define a function to handle the entire separation process def separate_audio(input_audio, output_dir, model_voc_inst, model_deecho, model_back_voc): # Create output directory if it doesn't exist if not os.path.exists(output_dir): os.makedirs(output_dir) separator = Separator(output_dir=output_dir) # Define output files vocals = os.path.join(output_dir, 'Vocals.wav') instrumental = os.path.join(output_dir, 'Instrumental.wav') vocals_reverb = os.path.join(output_dir, 'Vocals (Reverb).wav') vocals_no_reverb = os.path.join(output_dir, 'Vocals (No Reverb).wav') lead_vocals = os.path.join(output_dir, 'Lead Vocals.wav') backing_vocals = os.path.join(output_dir, 'Backing Vocals.wav') # Splitting a track into Vocal and Instrumental separator.load_model(model_filename=model_voc_inst) voc_inst = separator.separate(input_audio) os.rename(os.path.join(output_dir, voc_inst[0]), instrumental) # Rename to “Instrumental.wav” os.rename(os.path.join(output_dir, voc_inst[1]), vocals) # Rename to “Vocals.wav” # Applying DeEcho-DeReverb to Vocals separator.load_model(model_filename=model_deecho) voc_no_reverb = separator.separate(vocals) os.rename(os.path.join(output_dir, voc_no_reverb[0]), vocals_no_reverb) # Rename to “Vocals (No Reverb).wav” os.rename(os.path.join(output_dir, voc_no_reverb[1]), vocals_reverb) # Rename to “Vocals (Reverb).wav” # Separating Back Vocals from Main Vocals separator.load_model(model_filename=model_back_voc) backing_voc = separator.separate(vocals_no_reverb) os.rename(os.path.join(output_dir, backing_voc[0]), backing_vocals) # Rename to “Backing Vocals.wav” os.rename(os.path.join(output_dir, backing_voc[1]), lead_vocals) # Rename to “Lead Vocals.wav” return instrumental, vocals, vocals_reverb, vocals_no_reverb, lead_vocals, backing_vocals # Main function to process audio (Inference) def process_audio(MODEL_NAME, SOUND_PATH, F0_CHANGE, F0_METHOD, MIN_PITCH, MAX_PITCH, CREPE_HOP_LENGTH, INDEX_RATE, FILTER_RADIUS, RMS_MIX_RATE, PROTECT, SPLIT_INFER, MIN_SILENCE, SILENCE_THRESHOLD, SEEK_STEP, KEEP_SILENCE, FORMANT_SHIFT, QUEFRENCY, TIMBRE, F0_AUTOTUNE, OUTPUT_FORMAT, upload_audio=None): # If no sound path is given, use the uploaded file if not SOUND_PATH and upload_audio is not None: SOUND_PATH = os.path.join("uploaded_audio", upload_audio.name) with open(SOUND_PATH, "wb") as f: f.write(upload_audio.read()) # Check if a model name is provided if not MODEL_NAME: return "Please provide a model name." # Run the inference os.system("chmod +x stftpitchshift") inferred_audio = infer_audio( MODEL_NAME, SOUND_PATH, F0_CHANGE, F0_METHOD, MIN_PITCH, MAX_PITCH, CREPE_HOP_LENGTH, INDEX_RATE, FILTER_RADIUS, RMS_MIX_RATE, PROTECT, SPLIT_INFER, MIN_SILENCE, SILENCE_THRESHOLD, SEEK_STEP, KEEP_SILENCE, FORMANT_SHIFT, QUEFRENCY, TIMBRE, F0_AUTOTUNE, OUTPUT_FORMAT ) return inferred_audio # Gradio Blocks Interface with Tabs with gr.Blocks(title="Hex RVC") as app: gr.Markdown("# Hex RVC") with gr.Tab("Audio Separation"): with gr.Row(): input_audio = gr.Audio(source="upload", type="filepath", label="Upload Audio File") output_dir = gr.Textbox(value="/content/output", label="Output Directory") with gr.Row(): model_voc_inst = gr.Textbox(value='model_bs_roformer_ep_317_sdr_12.9755.ckpt', label="Vocal & Instrumental Model") model_deecho = gr.Textbox(value='UVR-DeEcho-DeReverb.pth', label="DeEcho-DeReverb Model") model_back_voc = gr.Textbox(value='mel_band_roformer_karaoke_aufr33_viperx_sdr_10.1956.ckpt', label="Backing Vocals Model") separate_button = gr.Button("Separate Audio") with gr.Row(): instrumental_out = gr.Audio(label="Instrumental") vocals_out = gr.Audio(label="Vocals") vocals_reverb_out = gr.Audio(label="Vocals (Reverb)") vocals_no_reverb_out = gr.Audio(label="Vocals (No Reverb)") lead_vocals_out = gr.Audio(label="Lead Vocals") backing_vocals_out = gr.Audio(label="Backing Vocals") separate_button.click( separate_audio, inputs=[input_audio, output_dir, model_voc_inst, model_deecho, model_back_voc], outputs=[instrumental_out, vocals_out, vocals_reverb_out, vocals_no_reverb_out, lead_vocals_out, backing_vocals_out] ) with gr.Tab("Inference"): with gr.Row(): MODEL_NAME = gr.Textbox(label="Model Name", placeholder="Enter model name") SOUND_PATH = gr.Textbox(label="Audio Path (Optional)", placeholder="Leave blank to upload audio") upload_audio = gr.File(label="Upload Audio", type='filepath', file_types=["audio"]) with gr.Row(): F0_CHANGE = gr.Number(label="Pitch Change (semitones)", value=0) F0_METHOD = gr.Dropdown(choices=["crepe", "harvest", "mangio-crepe", "rmvpe", "rmvpe+", "fcpe", "hybrid[mangio-crepe+rmvpe]", "hybrid[mangio-crepe+fcpe]", "hybrid[rmvpe+fcpe]", "hybrid[mangio-crepe+rmvpe+fcpe]"], label="F0 Method", value="fcpe") with gr.Row(): MIN_PITCH = gr.Textbox(label="Min Pitch", value="50") MAX_PITCH = gr.Textbox(label="Max Pitch", value="1100") CREPE_HOP_LENGTH = gr.Number(label="Crepe Hop Length", value=120) INDEX_RATE = gr.Slider(label="Index Rate", minimum=0, maximum=1, value=0.75) FILTER_RADIUS = gr.Number(label="Filter Radius", value=3) RMS_MIX_RATE = gr.Slider(label="RMS Mix Rate", minimum=0, maximum=1, value=0.25) PROTECT = gr.Slider(label="Protect", minimum=0, maximum=1, value=0.33) with gr.Accordion("Advanced Settings", open=False): SPLIT_INFER = gr.Checkbox(label="Enable Split Inference", value=False) MIN_SILENCE = gr.Number(label="Min Silence (ms)", value=500) SILENCE_THRESHOLD = gr.Number(label="Silence Threshold (dBFS)", value=-50) SEEK_STEP = gr.Slider(label="Seek Step (ms)", minimum=1, maximum=10, value=1) KEEP_SILENCE = gr.Number(label="Keep Silence (ms)", value=200) FORMANT_SHIFT = gr.Checkbox(label="Enable Formant Shift", value=False) QUEFRENCY = gr.Number(label="Quefrency", value=0) TIMBRE = gr.Number(label="Timbre", value=1) F0_AUTOTUNE = gr.Checkbox(label="Enable F0 Autotune", value=False) OUTPUT_FORMAT = gr.Dropdown(choices=["wav", "flac", "mp3"], label="Output Format", value="wav") run_button = gr.Button("Run Inference") output_audio = gr.Audio(label="Generated Audio", type='filepath') run_button.click( process_audio, inputs=[MODEL_NAME, SOUND_PATH, F0_CHANGE, F0_METHOD, MIN_PITCH, MAX_PITCH, CREPE_HOP_LENGTH, INDEX_RATE, FILTER_RADIUS, RMS_MIX_RATE, PROTECT, SPLIT_INFER, MIN_SILENCE, SILENCE_THRESHOLD, SEEK_STEP, KEEP_SILENCE, FORMANT_SHIFT, QUEFRENCY, TIMBRE, F0_AUTOTUNE, OUTPUT_FORMAT, upload_audio], outputs=output_audio ) # Launch the Gradio app app.launch()