Adrian8as commited on
Commit
b00f2b1
1 Parent(s): b79531d

Adding initial files

Browse files
Files changed (3) hide show
  1. app.py +82 -0
  2. models/sgd_90.pkl +3 -0
  3. requirements.txt +7 -0
app.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import librosa, joblib, numpy as np, gradio as gr
2
+ from scipy.interpolate import interp1d
3
+ from pyAudioAnalysis import ShortTermFeatures
4
+ from pydub.silence import detect_nonsilent
5
+ from pydub import AudioSegment
6
+
7
+
8
+ def smart_resize(arr, target_size):
9
+ current_size = arr.shape[1]
10
+
11
+ current_idx = np.linspace(0, current_size - 1, current_size)
12
+ target_idx = np.linspace(0, current_size - 1, target_size)
13
+
14
+ # Interpolate/extrapolate
15
+ interp_func = interp1d(current_idx, arr.squeeze(), kind='linear', fill_value="extrapolate")
16
+ resized_arr = interp_func(target_idx)
17
+
18
+ return resized_arr.reshape(1, target_size)
19
+
20
+ def remove_silence(wav_file):
21
+ audSeg = AudioSegment.from_wav(wav_file)
22
+ non_silence_ranges = detect_nonsilent(audSeg, min_silence_len=5, silence_thresh=-30)
23
+
24
+ if not non_silence_ranges:
25
+ sound = audSeg
26
+ else:
27
+ start = non_silence_ranges[0][0]
28
+ end = non_silence_ranges[-1][1]
29
+ trimmed_sound = audSeg[start:end]
30
+ sound = trimmed_sound
31
+
32
+ sound.export('audio.wav', format="wav")
33
+
34
+ def transform_data(audio):
35
+ remove_silence(audio)
36
+ x, sr = librosa.load('audio.wav')
37
+
38
+ result, f_names = ShortTermFeatures.feature_extraction(x, sr, 0.050*sr, 0.025*sr)
39
+
40
+ resize_features = smart_resize(result.reshape(1,-1), 20)
41
+
42
+ return resize_features
43
+
44
+ def predict(newdf, loaded_model):
45
+
46
+ prediction = loaded_model.predict(newdf)
47
+
48
+ return prediction
49
+
50
+ def get_label(newpred):
51
+ if newpred == 0:
52
+ return 'No'
53
+ else:
54
+ return 'Si'
55
+
56
+ def load_model():
57
+ ram_for = joblib.load('models/sgd_90.pkl')
58
+
59
+ return ram_for
60
+
61
+ def main(audio):
62
+ newdf = transform_data(audio)
63
+ loaded_model = load_model()
64
+ newpred = predict(newdf, loaded_model)
65
+ final = get_label(newpred)
66
+
67
+ return final
68
+
69
+ demo = gr.Interface(
70
+ title = "Autoagent | YES or NO Classification - Layer7",
71
+ description = "<h3>This model is useful to classify if the user says 'Si' or 'No'. 🎙️ </h3> <br> <b>Record your voice:</b>",
72
+ allow_flagging = "never",
73
+ fn = main,
74
+ inputs = gr.Audio(
75
+ sources=["microphone"],
76
+ type="filepath",
77
+ ),
78
+ outputs = gr.Textbox(label="Clasification")
79
+ )
80
+
81
+ if __name__ == "__main__":
82
+ demo.launch(show_api=False)
models/sgd_90.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:03f33949524bce752dae123a0fcbaac91be1e390bcaa338f141835463c795a78
3
+ size 1248
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ gradio
2
+ joblib
3
+ numpy
4
+ librosa
5
+ scipy
6
+ pyAudioAnalysis
7
+ pydub