AutoAgent / app.py
Adrian8as's picture
Changes in string
3a95125 verified
import librosa, joblib, numpy as np, gradio as gr
from scipy.interpolate import interp1d
from pyAudioAnalysis import ShortTermFeatures
from pydub.silence import detect_nonsilent
from pydub import AudioSegment
def smart_resize(arr, target_size):
current_size = arr.shape[1]
current_idx = np.linspace(0, current_size - 1, current_size)
target_idx = np.linspace(0, current_size - 1, target_size)
# Interpolate/extrapolate
interp_func = interp1d(current_idx, arr.squeeze(), kind='linear', fill_value="extrapolate")
resized_arr = interp_func(target_idx)
return resized_arr.reshape(1, target_size)
def remove_silence(wav_file):
audSeg = AudioSegment.from_wav(wav_file)
non_silence_ranges = detect_nonsilent(audSeg, min_silence_len=5, silence_thresh=-30)
if not non_silence_ranges:
sound = audSeg
else:
start = non_silence_ranges[0][0]
end = non_silence_ranges[-1][1]
trimmed_sound = audSeg[start:end]
sound = trimmed_sound
sound.export('audio.wav', format="wav")
def transform_data(audio):
remove_silence(audio)
x, sr = librosa.load('audio.wav')
result, f_names = ShortTermFeatures.feature_extraction(x, sr, 0.050*sr, 0.025*sr)
resize_features = smart_resize(result.reshape(1,-1), 20)
return resize_features
def predict(newdf, loaded_model):
prediction = loaded_model.predict(newdf)
proba = loaded_model.predict_proba(newdf)
return prediction, proba[0]
def get_label(newpred):
if newpred == 0:
return 'No'
else:
return 'Si'
def load_model():
ram_for = joblib.load('models/sgd_90.pkl')
return ram_for
def main(audio):
newdf = transform_data(audio)
loaded_model = load_model()
newpred, proba = predict(newdf, loaded_model)
final = get_label(newpred)
return final, {'Si probability': proba[1],
'No probability': proba[0]}
demo = gr.Interface(
title = "Autoagent | YES or NO Classification - Layer7",
description = """<h3>This model is useful to classify if the user says 'Si' or 'No'. 🎙️ </h3>
<img src="https://huggingface.co/spaces/Adrian8as/imagen/resolve/main/output.png" width="350" height="350"/> <br>
<b>Record your voice:</b>""",
allow_flagging = "never",
fn = main,
inputs = gr.Audio(
sources=["microphone"],
type="filepath",
),
outputs = [gr.Textbox(label="Clasification"),"label"]
)
if __name__ == "__main__":
demo.launch(show_api=False)