File size: 4,514 Bytes
9b03b75 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 |
import os
from fastai.vision.all import *
import gradio as gr
import pickle
import tempfile
from transformers import AutoTokenizer, AutoModelWithLMHead
from speechbrain.inference.interfaces import foreign_class
# Facial expression classifier
# Emotion
learn_emotion = load_learner('emotions_vgg19.pkl')
learn_emotion_labels = learn_emotion.dls.vocab
# Predict
def predict(img):
img = PILImage.create(img)
pred_emotion, pred_emotion_idx, probs_emotion = learn_emotion.predict(img)
predicted_emotion = learn_emotion_labels[pred_emotion_idx]
return predicted_emotion
# Gradio
title = "Facial Emotion Detector"
description = gr.Markdown(
"""Ever wondered what a person might be feeling looking at their picture?
Well, now you can! Try this fun app. Just upload a facial image in JPG or
PNG format. You can now see what they might have felt when the picture
was taken.
**Tip**: Be sure to only include face to get best results. Check some sample images
below for inspiration!""").value
article = gr.Markdown(
"""**DISCLAIMER:** This model does not reveal the actual emotional state of a person. Use and
interpret results at your own risk!.
**PREMISE:** The idea is to determine an overall emotion of a person
based on the pictures. We are restricting pictures to only include close-up facial
images.
**DATA:** FER2013 dataset consists of 48x48 pixel grayscale images of faces.Images
are assigned one of the 7 emotions: Angry, Disgust, Fear, Happy, Sad, Surprise, and Neutral.
""").value
enable_queue=True
examples = ['happy1.jpg', 'happy2.jpg', 'angry1.png', 'angry2.jpg', 'neutral1.jpg', 'neutral2.jpg']
image_mode=gr.Interface(fn = predict,
inputs = gr.Image( image_mode='L'),
outputs = [gr.Label(label='Emotion')], #gr.Label(),
title = title,
examples = examples,
description = description,
article=article,
allow_flagging='never')
# Txet Model
# Load tokenizer and model from pickles
with open("emotion_tokenizer.pkl", "rb") as f:
tokenizer = pickle.load(f)
with open("emotion_model.pkl", "rb") as f:
model = pickle.load(f)
def classify_emotion(text):
# Tokenize input text and generate output
input_ids = tokenizer.encode("emotion: " + text, return_tensors="pt")
output = model.generate(input_ids)
output_text = tokenizer.decode(output[0], skip_special_tokens=True)
# Classify the emotion into positive, negative, or neutral
if output_text in ["joy", "love"]:
return "Positive"
elif output_text == "surprise":
return "Neutral"
else:
return "Negative"
return output_text
text_model = gr.Interface(fn=classify_emotion, inputs="textbox", outputs="textbox")
# Initialize the classifier
classifier = foreign_class(source="speechbrain/emotion-recognition-wav2vec2-IEMOCAP", pymodule_file="custom_interface.py", classname="CustomEncoderWav2vec2Classifier")
def save_uploaded_file(uploaded_file):
temp_dir = tempfile.TemporaryDirectory()
file_path = os.path.join(temp_dir.name, uploaded_file.name)
with open(file_path, "wb") as f:
f.write(uploaded_file.getbuffer())
return file_path
def emotion(file_path):
if file_path:
# Classify the file
out_prob, score, index, text_lab = classifier.classify_file(file_path)
if isinstance(text_lab, list):
text_lab = text_lab[0]
# Map the original labels to the desired categories
emotion_mapping = {
'neu': 'Neutral',
'ang': 'Angry',
'hap': 'Happy',
'sad': 'Sadness'
}
# Get the corresponding category from the mapping
emotion_category = emotion_mapping.get(text_lab, 'Unknown')
emotion_category = emotion_mapping.get(text_lab, 'Unknown')
# Return the emotion category
return emotion_category
else:
return "Please provide the path to an audio file."
audio_model = gr.Interface(fn=emotion, inputs="textbox", outputs="textbox")
main_model = gr.TabbedInterface([text_model, image_mode,audio_model], ["Text Emotion Recognition", "Image Emotion Recognition" , "Audio Emotion Recognition"])
if _name_ == "_main_":
main_model.launch() |