|
import os |
|
from fastai.vision.all import * |
|
import gradio as gr |
|
import pickle |
|
import tempfile |
|
from transformers import AutoTokenizer, AutoModelWithLMHead |
|
from speechbrain.inference.interfaces import foreign_class |
|
|
|
|
|
|
|
|
|
|
|
|
|
learn_emotion = load_learner('emotions_vgg19.pkl') |
|
learn_emotion_labels = learn_emotion.dls.vocab |
|
|
|
|
|
|
|
def predict(img): |
|
img = PILImage.create(img) |
|
pred_emotion, pred_emotion_idx, probs_emotion = learn_emotion.predict(img) |
|
predicted_emotion = learn_emotion_labels[pred_emotion_idx] |
|
return predicted_emotion |
|
|
|
|
|
|
|
title = "Facial Emotion Detector" |
|
|
|
description = gr.Markdown( |
|
"""Ever wondered what a person might be feeling looking at their picture? |
|
Well, now you can! Try this fun app. Just upload a facial image in JPG or |
|
PNG format. You can now see what they might have felt when the picture |
|
was taken. |
|
|
|
**Tip**: Be sure to only include face to get best results. Check some sample images |
|
below for inspiration!""").value |
|
|
|
article = gr.Markdown( |
|
"""**DISCLAIMER:** This model does not reveal the actual emotional state of a person. Use and |
|
interpret results at your own risk!. |
|
|
|
**PREMISE:** The idea is to determine an overall emotion of a person |
|
based on the pictures. We are restricting pictures to only include close-up facial |
|
images. |
|
|
|
**DATA:** FER2013 dataset consists of 48x48 pixel grayscale images of faces.Images |
|
are assigned one of the 7 emotions: Angry, Disgust, Fear, Happy, Sad, Surprise, and Neutral. |
|
|
|
""").value |
|
|
|
enable_queue=True |
|
|
|
examples = ['happy1.jpg', 'happy2.jpg', 'angry1.png', 'angry2.jpg', 'neutral1.jpg', 'neutral2.jpg'] |
|
|
|
image_mode=gr.Interface(fn = predict, |
|
inputs = gr.Image( image_mode='L'), |
|
outputs = [gr.Label(label='Emotion')], |
|
title = title, |
|
examples = examples, |
|
description = description, |
|
article=article, |
|
allow_flagging='never') |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
with open("emotion_tokenizer.pkl", "rb") as f: |
|
tokenizer = pickle.load(f) |
|
|
|
with open("emotion_model.pkl", "rb") as f: |
|
model = pickle.load(f) |
|
|
|
|
|
|
|
def classify_emotion(text): |
|
|
|
input_ids = tokenizer.encode("emotion: " + text, return_tensors="pt") |
|
output = model.generate(input_ids) |
|
output_text = tokenizer.decode(output[0], skip_special_tokens=True) |
|
|
|
|
|
if output_text in ["joy", "love"]: |
|
return "Positive" |
|
elif output_text == "surprise": |
|
return "Neutral" |
|
else: |
|
return "Negative" |
|
return output_text |
|
|
|
|
|
text_model = gr.Interface(fn=classify_emotion, inputs="textbox", outputs="textbox") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
classifier = foreign_class(source="speechbrain/emotion-recognition-wav2vec2-IEMOCAP", pymodule_file="custom_interface.py", classname="CustomEncoderWav2vec2Classifier") |
|
|
|
def save_uploaded_file(uploaded_file): |
|
temp_dir = tempfile.TemporaryDirectory() |
|
file_path = os.path.join(temp_dir.name, uploaded_file.name) |
|
with open(file_path, "wb") as f: |
|
f.write(uploaded_file.getbuffer()) |
|
return file_path |
|
|
|
|
|
def emotion(file_path): |
|
|
|
if file_path: |
|
|
|
out_prob, score, index, text_lab = classifier.classify_file(file_path) |
|
if isinstance(text_lab, list): |
|
text_lab = text_lab[0] |
|
|
|
emotion_mapping = { |
|
'neu': 'Neutral', |
|
'ang': 'Angry', |
|
'hap': 'Happy', |
|
'sad': 'Sadness' |
|
} |
|
|
|
emotion_category = emotion_mapping.get(text_lab, 'Unknown') |
|
|
|
emotion_category = emotion_mapping.get(text_lab, 'Unknown') |
|
|
|
return emotion_category |
|
else: |
|
return "Please provide the path to an audio file." |
|
|
|
|
|
|
|
|
|
|
|
audio_model = gr.Interface(fn=emotion, inputs="textbox", outputs="textbox") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main_model = gr.TabbedInterface([text_model, image_mode,audio_model], ["Text Emotion Recognition", "Image Emotion Recognition" , "Audio Emotion Recognition"]) |
|
|
|
if _name_ == "_main_": |
|
main_model.launch() |