# Standard Library import os # Third-Party import streamlit as st import librosa # Local from src.models.MDX_net.kimvocal import KimVocal from src.loader import Loader from src.models.MDX_net.mdx_net import Conv_TDF_net_trimm # Constants from src.constants import ONNX_MODEL_PATH INPUT_FOLDER = "./datasets/input" OUTPUT_FOLDER = "./datasets/output" def main(): # Set page configuration and theming st.set_page_config( page_title="Sing For Me", page_icon="🎵", ) st.title("Vocal Isolator") # Upload WAV file uploaded_file = st.file_uploader( "Upload an Audio File (WAV, MP3, OGG, FLAC)", type=["wav", "mp3", "ogg", "flac"], key="file_uploader", ) if uploaded_file is not None: # Process the uploaded audio st.subheader("Audio Processing") st.write("Processing the uploaded audio file...") # Display a progress bar while processing progress_bar = st.progress(0) progress_text = st.empty() loader = Loader(INPUT_FOLDER, OUTPUT_FOLDER) music_tensor, samplerate = loader.prepare_uploaded_file( uploaded_file=uploaded_file ) model_raw_python = Conv_TDF_net_trimm( model_path=ONNX_MODEL_PATH, use_onnx=True, target_name="vocals", L=11, l=3, g=48, bn=8, bias=False, dim_f=11, dim_t=8, ) kimvocal = KimVocal() vocals_tensor = kimvocal.demix_vocals( music_tensor=music_tensor, sample_rate=samplerate, model=model_raw_python, streamlit_progressbar=progress_bar, ) vocals_array = vocals_tensor.numpy() # Update progress progress_bar.progress(100) progress_text.text("Audio processing complete!") # Display processed audio st.subheader("Processed Audio") # TODO: Is it encoding it wrong? Maybe fix it later. st.audio(data=vocals_array, format="audio/mpeg", sample_rate=samplerate) if __name__ == "__main__": main()