Devops-hestabit
commited on
Commit
•
a9c0da2
1
Parent(s):
0a828f9
Upload 8 files
Browse filesUploaded model file and handler.py file for inference endpoint
- .gitattributes +2 -0
- README.md +3 -0
- app.py +67 -0
- audio/Test_Emotion.wav +3 -0
- audio/test.wav +3 -0
- handler.py +64 -0
- models/best_model_depression.h5 +3 -0
- models/best_model_emotion.h5 +3 -0
- requirements.txt +5 -0
.gitattributes
CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
audio/Test_Emotion.wav filter=lfs diff=lfs merge=lfs -text
|
37 |
+
audio/test.wav filter=lfs diff=lfs merge=lfs -text
|
README.md
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
license: other
|
3 |
+
---
|
app.py
ADDED
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import librosa
|
2 |
+
import numpy as np
|
3 |
+
import tensorflow as tf
|
4 |
+
from tensorflow.keras.models import load_model
|
5 |
+
|
6 |
+
emotion_model = load_model('models/best_model_emotion.h5')
|
7 |
+
depression_model = load_model('models/best_model_depression.h5')
|
8 |
+
|
9 |
+
emotion_labels = ['Angry', 'Calm', 'Fearful', 'Happy', 'Sad']
|
10 |
+
def extract_features(audio_path):
|
11 |
+
X, sample_rate = librosa.load(audio_path,duration=2.5,sr=22050*2,offset=0.5) #, res_type='kaiser_fast'
|
12 |
+
features = librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=30)
|
13 |
+
pad_emotion = 216 - features.shape[1]
|
14 |
+
pad_depression = 2584 - features.shape[1]
|
15 |
+
|
16 |
+
if pad_emotion > 0:
|
17 |
+
emo_features = np.pad(features, [(0, 0), (0, pad_emotion)], mode='constant')
|
18 |
+
elif pad_emotion < 0:
|
19 |
+
emo_features = features[:,pad_emotion ]
|
20 |
+
else :
|
21 |
+
emo_features = features
|
22 |
+
|
23 |
+
if pad_depression > 0:
|
24 |
+
dep_features = np.pad(features, [(0, 0), (0, pad_depression)], mode='constant')
|
25 |
+
elif pad_depression < 0:
|
26 |
+
dep_features = features[:,pad_depression]
|
27 |
+
else:
|
28 |
+
dep_features = features
|
29 |
+
|
30 |
+
emo_features = np.expand_dims(emo_features, axis = 0)
|
31 |
+
dep_features = np.expand_dims(dep_features, axis = 0)
|
32 |
+
|
33 |
+
return emo_features, dep_features
|
34 |
+
|
35 |
+
def predict_emotion_and_depression(audio):
|
36 |
+
# Extract audio features
|
37 |
+
print(audio)
|
38 |
+
print(len(audio))
|
39 |
+
emo_features, dep_features = extract_features(audio)
|
40 |
+
|
41 |
+
# Predict emotion
|
42 |
+
emotion_pred = emotion_model.predict(emo_features)[0]
|
43 |
+
print(emotion_pred)
|
44 |
+
emotion_index = np.argmax(emotion_pred)
|
45 |
+
emotion = emotion_labels[emotion_index]
|
46 |
+
|
47 |
+
# Predict depression
|
48 |
+
depression_pred = depression_model.predict(dep_features)[0]
|
49 |
+
depression = "Depressed" if depression_pred >= 0.5 else "Not Depressed"
|
50 |
+
|
51 |
+
return emotion, depression
|
52 |
+
|
53 |
+
def handler(request):
|
54 |
+
if request.method == 'POST':
|
55 |
+
# Get the audio data from the request
|
56 |
+
audio = request.data # Replace this with the actual way to access the audio data in the request
|
57 |
+
|
58 |
+
# Make predictions using the models
|
59 |
+
emotion, depression = predict_emotion_and_depression(audio)
|
60 |
+
|
61 |
+
# Return the predictions as a response
|
62 |
+
response = {
|
63 |
+
"emotion": emotion,
|
64 |
+
"depression": depression
|
65 |
+
}
|
66 |
+
|
67 |
+
return response
|
audio/Test_Emotion.wav
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:385c36f8d53a617252000ae23a21eada3e788ae4749a665ba728227928a00221
|
3 |
+
size 1098284
|
audio/test.wav
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:385c36f8d53a617252000ae23a21eada3e788ae4749a665ba728227928a00221
|
3 |
+
size 1098284
|
handler.py
ADDED
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import io
|
2 |
+
import numpy as np
|
3 |
+
import pydub
|
4 |
+
import scipy
|
5 |
+
from scipy.io import wavfile
|
6 |
+
from pydub import AudioSegment
|
7 |
+
import base64
|
8 |
+
import librosa
|
9 |
+
import tensorflow as tf
|
10 |
+
|
11 |
+
class EndpointHandler():
|
12 |
+
|
13 |
+
def __init__(self, path):
|
14 |
+
self.emotion_labels = ['Angry', 'Calm', 'Fearful', 'Happy', 'Sad']
|
15 |
+
self.emotion_model = tf.keras.models.load_model(f"{path}/models/best_model_emotion.h5")
|
16 |
+
self.depression_model = tf.keras.models.load_model(f"{path}/models/best_model_depression.h5")
|
17 |
+
|
18 |
+
def __call__(self, input_data):
|
19 |
+
audio_base64 = input_data.pop("inputs", input_data)
|
20 |
+
audio_features = self.preprocess_audio_data(audio_base64)
|
21 |
+
emotion_prediction, depression_prediction = self.perform_emotion_analysis(audio_features)
|
22 |
+
return {
|
23 |
+
"emotion": emotion_prediction,
|
24 |
+
"depression": depression_prediction
|
25 |
+
}
|
26 |
+
|
27 |
+
def get_mfcc_features(self, features, padding):
|
28 |
+
padded_features = padding - features.shape[1]
|
29 |
+
if padded_features > 0:
|
30 |
+
features = np.pad(features, [(0, 0), (0, padded_features)], mode='constant')
|
31 |
+
elif padded_features < 0:
|
32 |
+
features = features[:, padded_features:]
|
33 |
+
return np.expand_dims(features, axis=0)
|
34 |
+
|
35 |
+
def preprocess_audio_data(self, base64_string, duration=2.5, desired_sr=22050*2, offset=0.5):
|
36 |
+
# audio_base64 = base64_string.replace("data:audio/webm;codecs=opus;base64,", "")
|
37 |
+
audio_bytes = base64.b64decode(base64_string)
|
38 |
+
audio_io = io.BytesIO(audio_bytes)
|
39 |
+
audio = AudioSegment.from_file(audio_io, format="webm")
|
40 |
+
|
41 |
+
byte_io = io.BytesIO()
|
42 |
+
audio.export(byte_io, format="wav")
|
43 |
+
byte_io.seek(0)
|
44 |
+
|
45 |
+
sample_rate, audio_array = wavfile.read(byte_io)
|
46 |
+
|
47 |
+
audio_array = librosa.resample(audio_array.astype(float), orig_sr=sample_rate, target_sr=desired_sr)
|
48 |
+
start_sample = int(offset * desired_sr)
|
49 |
+
end_sample = start_sample + int(duration * desired_sr)
|
50 |
+
audio_array = audio_array[start_sample:end_sample]
|
51 |
+
|
52 |
+
|
53 |
+
# X, sample_rate = librosa.load(audio_io, duration=duration, sr=desired_sr, offset=offset)
|
54 |
+
X = librosa.util.normalize(audio_array)
|
55 |
+
return librosa.feature.mfcc(y=X, sr=desired_sr, n_mfcc=30)
|
56 |
+
|
57 |
+
def perform_emotion_analysis(self, features, emotion_padding=216, depression_padding=2584):
|
58 |
+
emotion_features = self.get_mfcc_features(features, emotion_padding)
|
59 |
+
depression_features = self.get_mfcc_features(features, depression_padding)
|
60 |
+
emotion_prediction = self.emotion_model.predict(emotion_features)[0]
|
61 |
+
emotion_prediction = self.emotion_labels[np.argmax(emotion_prediction)]
|
62 |
+
depression_prediction = self.depression_model.predict(depression_features)[0]
|
63 |
+
# depression_prediction = "Depressed" if depression_prediction >= 0.5 else "Not Depressed"
|
64 |
+
return emotion_prediction, depression_prediction
|
models/best_model_depression.h5
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:afdf804b5015afbf43d692a629e79c6bf8511ae878101cba13904e3954643d95
|
3 |
+
size 10756184
|
models/best_model_emotion.h5
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7df1b2e83367f87c9c91ec49e256dbf1533421dcbfba61afc697c5b4679c26ea
|
3 |
+
size 10761304
|
requirements.txt
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
keras==2.13.1
|
2 |
+
librosa==0.10.0.post2
|
3 |
+
tensorflow-cpu==2.13.0
|
4 |
+
pydub
|
5 |
+
scipy
|