import os os.environ['OPENCV_AVFOUNDATION_SKIP_AUTH'] = '1' import streamlit as st import cv2 import numpy as np from transformers import pipeline from PIL import Image, ImageDraw from mtcnn import MTCNN # Initialize the Hugging Face pipeline for facial emotion detection emotion_pipeline = pipeline("image-classification", model="trpakov/vit-face-expression") # Initialize MTCNN for face detection mtcnn = MTCNN() # Function to analyze sentiment def analyze_sentiment(face): # Convert face to RGB rgb_face = cv2.cvtColor(face, cv2.COLOR_BGR2RGB) # Convert the face to a PIL image pil_image = Image.fromarray(rgb_face) # Analyze sentiment using the Hugging Face pipeline results = emotion_pipeline(pil_image) # Get the dominant emotion dominant_emotion = max(results, key=lambda x: x['score'])['label'] return dominant_emotion TEXT_SIZE = 3 # Function to detect faces, analyze sentiment, and draw a red box around them def detect_and_draw_faces(frame): # Detect faces using MTCNN results = mtcnn.detect_faces(frame) # Draw on the frame for result in results: x, y, w, h = result['box'] face = frame[y:y+h, x:x+w] sentiment = analyze_sentiment(face) cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 0, 255), 10) # Thicker red box # Calculate position for the text background and the text itself text_size = cv2.getTextSize(sentiment, cv2.FONT_HERSHEY_SIMPLEX, TEXT_SIZE, 2)[0] text_x = x text_y = y - 10 background_tl = (text_x, text_y - text_size[1]) background_br = (text_x + text_size[0], text_y + 5) # Draw black rectangle as background cv2.rectangle(frame, background_tl, background_br, (0, 0, 0), cv2.FILLED) # Draw white text on top cv2.putText(frame, sentiment, (text_x, text_y), cv2.FONT_HERSHEY_SIMPLEX, TEXT_SIZE, (255, 255, 255), 2) return frame # Function to capture video from webcam def video_stream(): video_capture = cv2.VideoCapture(0) if not video_capture.isOpened(): st.error("Error: Could not open video capture device.") return while True: ret, frame = video_capture.read() if not ret: st.error("Error: Failed to read frame from video capture device.") break yield frame video_capture.release() # Streamlit UI st.markdown( """ """, unsafe_allow_html=True ) st.title("Computer Vision Test Lab") st.subheader("Facial Sentiment") # Columns for input and output streams col1, col2 = st.columns(2) with col1: st.header("Input Stream") st.subheader("Webcam") video_placeholder = st.empty() with col2: st.header("Output Stream") st.subheader("Analysis") output_placeholder = st.empty() sentiment_placeholder = st.empty() # Start video stream video_capture = cv2.VideoCapture(0) if not video_capture.isOpened(): st.error("Error: Could not open video capture device.") else: while True: ret, frame = video_capture.read() if not ret: st.error("Error: Failed to read frame from video capture device.") break # Display the input stream with the red box around the face video_placeholder.image(frame, channels="BGR") # Detect faces, analyze sentiment, and draw red boxes with sentiment labels frame_with_boxes = detect_and_draw_faces(frame) # Display the output stream (here it's the same as input, modify as needed) output_placeholder.image(frame_with_boxes, channels="BGR") # Add a short delay to control the frame rate if cv2.waitKey(1) & 0xFF == ord('q'): break