Upload 8 files

Browse files

Uploaded model file and handler.py file for inference endpoint

Files changed (9) hide show

.gitattributes +2 -0
README.md +3 -0
app.py +67 -0
audio/Test_Emotion.wav +3 -0
audio/test.wav +3 -0
handler.py +64 -0
models/best_model_depression.h5 +3 -0
models/best_model_emotion.h5 +3 -0
requirements.txt +5 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+audio/Test_Emotion.wav filter=lfs diff=lfs merge=lfs -text
+audio/test.wav filter=lfs diff=lfs merge=lfs -text

README.md ADDED Viewed

	@@ -0,0 +1,3 @@

+---
+license: other
+---

app.py ADDED Viewed

	@@ -0,0 +1,67 @@

+import librosa
+import numpy as np
+import tensorflow as tf
+from tensorflow.keras.models import load_model
+emotion_model = load_model('models/best_model_emotion.h5')
+depression_model = load_model('models/best_model_depression.h5')
+emotion_labels = ['Angry', 'Calm', 'Fearful', 'Happy', 'Sad']
+def extract_features(audio_path):
+    X, sample_rate = librosa.load(audio_path,duration=2.5,sr=22050*2,offset=0.5) #, res_type='kaiser_fast'
+    features = librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=30)
+    pad_emotion = 216 - features.shape[1]
+    pad_depression = 2584 - features.shape[1]
+    if pad_emotion > 0:
+        emo_features = np.pad(features, [(0, 0), (0, pad_emotion)], mode='constant')
+    elif pad_emotion < 0:
+        emo_features = features[:,pad_emotion  ]
+    else :
+        emo_features = features
+    if pad_depression > 0:
+        dep_features = np.pad(features, [(0, 0), (0, pad_depression)], mode='constant')
+    elif pad_depression < 0:
+        dep_features = features[:,pad_depression]
+    else:
+        dep_features = features
+    emo_features = np.expand_dims(emo_features, axis = 0)
+    dep_features = np.expand_dims(dep_features, axis = 0)
+    return emo_features, dep_features
+def predict_emotion_and_depression(audio):
+    # Extract audio features
+    print(audio)
+    print(len(audio))
+    emo_features, dep_features = extract_features(audio)
+    # Predict emotion
+    emotion_pred = emotion_model.predict(emo_features)[0]
+    print(emotion_pred)
+    emotion_index = np.argmax(emotion_pred)
+    emotion = emotion_labels[emotion_index]
+    # Predict depression
+    depression_pred = depression_model.predict(dep_features)[0]
+    depression = "Depressed" if depression_pred >= 0.5 else "Not Depressed"
+    return emotion, depression
+def handler(request):
+    if request.method == 'POST':
+        # Get the audio data from the request
+        audio = request.data  # Replace this with the actual way to access the audio data in the request
+        # Make predictions using the models
+        emotion, depression = predict_emotion_and_depression(audio)
+        # Return the predictions as a response
+        response = {
+            "emotion": emotion,
+            "depression": depression
+        }
+        return response

audio/Test_Emotion.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:385c36f8d53a617252000ae23a21eada3e788ae4749a665ba728227928a00221
+size 1098284

audio/test.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:385c36f8d53a617252000ae23a21eada3e788ae4749a665ba728227928a00221
+size 1098284

handler.py ADDED Viewed

	@@ -0,0 +1,64 @@

+import io
+import numpy as np
+import pydub
+import scipy
+from scipy.io import wavfile
+from pydub import AudioSegment
+import base64
+import librosa
+import tensorflow as tf
+class EndpointHandler():
+    def __init__(self, path):
+        self.emotion_labels = ['Angry', 'Calm', 'Fearful', 'Happy', 'Sad']
+        self.emotion_model = tf.keras.models.load_model(f"{path}/models/best_model_emotion.h5")
+        self.depression_model = tf.keras.models.load_model(f"{path}/models/best_model_depression.h5")
+    def __call__(self, input_data):
+        audio_base64 = input_data.pop("inputs", input_data)
+        audio_features = self.preprocess_audio_data(audio_base64)
+        emotion_prediction, depression_prediction = self.perform_emotion_analysis(audio_features)
+        return {
+            "emotion": emotion_prediction,
+            "depression": depression_prediction
+        }
+    def get_mfcc_features(self, features, padding):
+        padded_features = padding - features.shape[1]
+        if padded_features > 0:
+            features = np.pad(features, [(0, 0), (0, padded_features)], mode='constant')
+        elif padded_features < 0:
+            features = features[:, padded_features:]
+        return np.expand_dims(features, axis=0)
+    def preprocess_audio_data(self, base64_string, duration=2.5, desired_sr=22050*2, offset=0.5):
+        # audio_base64 = base64_string.replace("data:audio/webm;codecs=opus;base64,", "")
+        audio_bytes = base64.b64decode(base64_string)
+        audio_io = io.BytesIO(audio_bytes)
+        audio = AudioSegment.from_file(audio_io, format="webm")
+        byte_io = io.BytesIO()
+        audio.export(byte_io, format="wav")
+        byte_io.seek(0)
+        sample_rate, audio_array = wavfile.read(byte_io)
+        audio_array = librosa.resample(audio_array.astype(float), orig_sr=sample_rate, target_sr=desired_sr)
+        start_sample = int(offset * desired_sr)
+        end_sample = start_sample + int(duration * desired_sr)
+        audio_array = audio_array[start_sample:end_sample]
+        # X, sample_rate = librosa.load(audio_io, duration=duration, sr=desired_sr, offset=offset)
+        X = librosa.util.normalize(audio_array)
+        return librosa.feature.mfcc(y=X, sr=desired_sr, n_mfcc=30)
+    def perform_emotion_analysis(self, features, emotion_padding=216, depression_padding=2584):
+        emotion_features = self.get_mfcc_features(features, emotion_padding)
+        depression_features = self.get_mfcc_features(features, depression_padding)
+        emotion_prediction = self.emotion_model.predict(emotion_features)[0]
+        emotion_prediction = self.emotion_labels[np.argmax(emotion_prediction)]
+        depression_prediction = self.depression_model.predict(depression_features)[0]
+        # depression_prediction = "Depressed" if depression_prediction >= 0.5 else "Not Depressed"
+        return emotion_prediction, depression_prediction

models/best_model_depression.h5 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:afdf804b5015afbf43d692a629e79c6bf8511ae878101cba13904e3954643d95
+size 10756184

models/best_model_emotion.h5 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7df1b2e83367f87c9c91ec49e256dbf1533421dcbfba61afc697c5b4679c26ea
+size 10761304

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+keras==2.13.1
+librosa==0.10.0.post2
+tensorflow-cpu==2.13.0
+pydub
+scipy