import librosa, joblib, numpy as np, gradio as gr from scipy.interpolate import interp1d from pyAudioAnalysis import ShortTermFeatures from pydub.silence import detect_nonsilent from pydub import AudioSegment def smart_resize(arr, target_size): current_size = arr.shape[1] current_idx = np.linspace(0, current_size - 1, current_size) target_idx = np.linspace(0, current_size - 1, target_size) # Interpolate/extrapolate interp_func = interp1d(current_idx, arr.squeeze(), kind='linear', fill_value="extrapolate") resized_arr = interp_func(target_idx) return resized_arr.reshape(1, target_size) def remove_silence(wav_file): audSeg = AudioSegment.from_wav(wav_file) non_silence_ranges = detect_nonsilent(audSeg, min_silence_len=5, silence_thresh=-30) if not non_silence_ranges: sound = audSeg else: start = non_silence_ranges[0][0] end = non_silence_ranges[-1][1] trimmed_sound = audSeg[start:end] sound = trimmed_sound sound.export('audio.wav', format="wav") def transform_data(audio): remove_silence(audio) x, sr = librosa.load('audio.wav') result, f_names = ShortTermFeatures.feature_extraction(x, sr, 0.050*sr, 0.025*sr) resize_features = smart_resize(result.reshape(1,-1), 20) return resize_features def predict(newdf, loaded_model): prediction = loaded_model.predict(newdf) proba = loaded_model.predict_proba(newdf) return prediction, proba[0] def get_label(newpred): if newpred == 0: return 'No' else: return 'Si' def load_model(): ram_for = joblib.load('models/sgd_90.pkl') return ram_for def main(audio): newdf = transform_data(audio) loaded_model = load_model() newpred, proba = predict(newdf, loaded_model) final = get_label(newpred) return final, {'Si probability': proba[1], 'No probability': proba[0]} demo = gr.Interface( title = "Autoagent | YES or NO Classification - Layer7", description = """