|
import argparse |
|
|
|
import hparams |
|
import utils |
|
import multiresunet_model |
|
import preprocess_data |
|
|
|
import tensorflow as tf |
|
import numpy as np |
|
import librosa |
|
import torchaudio |
|
import torch |
|
|
|
if __name__ == '__main__': |
|
args = argparse.ArgumentParser() |
|
|
|
args.add_argument('Path',metavar='path',type=str,help='Path to audio track to be separated') |
|
args.add_argument('Source',metavar='source',type=str,help='Desired source to separate') |
|
args.add_argument('Model_path', metavar='path_to_model',type=str,help='Path to saved models') |
|
args.add_argument('Output_path', metavar='output_path',type=str,help='Output path for separated audio') |
|
|
|
|
|
|
|
args = args.parse_args() |
|
path_to_audio = args.Path |
|
source = args.Source |
|
path_to_model = args.Model_path |
|
output_path = args.Output_path + source + '.wav' |
|
|
|
|
|
model_lf = tf.keras.models.load_model(path_to_model + source + '_lf.h5') |
|
model_hf = tf.keras.models.load_model(path_to_model + source + '_hf.h5') |
|
|
|
|
|
y, sr = librosa.load(path_to_audio, hparams.sr, mono = True) |
|
|
|
|
|
C_lf,dc_lf,nf_lf = preprocess_data.forward_transform(y,hparams.lf_params['min_f'],hparams.lf_params['max_f'],hparams.lf_params['bins_per_octave'], hparams.lf_params['gamma']) |
|
C_hf,dc_hf,nf_hf = preprocess_data.forward_transform(y,hparams.hf_params['min_f'],hparams.hf_params['max_f'],hparams.hf_params['bins_per_octave'], hparams.hf_params['gamma']) |
|
|
|
dc_lf[:] = 0 |
|
dc_hf[:] = 0 |
|
nf_lf[:] = 0 |
|
nf_hf[:] = 0 |
|
|
|
phase_lf = np.angle(C_lf) |
|
phase_hf = np.angle(C_hf) |
|
|
|
|
|
c_lf = preprocess_data.make_chunks(C_lf) |
|
c_hf = preprocess_data.make_chunks(C_hf) |
|
|
|
|
|
c_lf = model_lf.predict(c_lf,batch_size = hparams.inference_batch_size) |
|
c_hf = model_hf.predict(c_hf,batch_size = hparams.inference_batch_size) |
|
|
|
|
|
mag_lf = np.hstack(c_lf[:,:,:,0])[:,:phase_lf.shape[-1]] |
|
mag_hf = np.hstack(c_hf[:,:,:,0])[:,:phase_hf.shape[-1]] |
|
c_lf = mag_lf * np.math.e**(phase_lf*1j) |
|
c_hf = mag_hf * np.math.e**(phase_hf*1j) |
|
|
|
|
|
y_lf_hat = preprocess_data.backward_transform(c_lf,dc_lf,nf_lf,y.shape[0],hparams.lf_params['min_f'],hparams.lf_params['max_f'],hparams.lf_params['bins_per_octave'], hparams.lf_params['gamma']) |
|
y_hf_hat = preprocess_data.backward_transform(c_hf,dc_hf,nf_hf,y.shape[0],hparams.hf_params['min_f'],hparams.hf_params['max_f'],hparams.hf_params['bins_per_octave'], hparams.hf_params['gamma']) |
|
y_hat = y_lf_hat + y_hf_hat |
|
|
|
print(mag_lf.shape) |
|
print(mag_hf.shape) |
|
|
|
print(y_lf_hat.shape) |
|
print(y_hf_hat.shape) |
|
|
|
torchaudio.save(output_path, torch.from_numpy(np.expand_dims(y_hat,0)), hparams.sr) |
|
|
|
|
|
|
|
|
|
|
|
|