Spaces:

eusholli
/

computer-vision-playground

Sleeping

Geoffrey Hollingworth

initial upload

3d3f535 6 months ago

4.4 kB

	import os
	os.environ['OPENCV_AVFOUNDATION_SKIP_AUTH'] = '1'

	import streamlit as st
	import cv2
	import numpy as np
	from transformers import pipeline
	from PIL import Image, ImageDraw
	from mtcnn import MTCNN

	# Initialize the Hugging Face pipeline for facial emotion detection
	emotion_pipeline = pipeline("image-classification", model="trpakov/vit-face-expression")

	# Initialize MTCNN for face detection
	mtcnn = MTCNN()

	# Function to analyze sentiment
	def analyze_sentiment(face):
	# Convert face to RGB
	rgb_face = cv2.cvtColor(face, cv2.COLOR_BGR2RGB)
	# Convert the face to a PIL image
	pil_image = Image.fromarray(rgb_face)
	# Analyze sentiment using the Hugging Face pipeline
	results = emotion_pipeline(pil_image)
	# Get the dominant emotion
	dominant_emotion = max(results, key=lambda x: x['score'])['label']
	return dominant_emotion

	TEXT_SIZE = 3

	# Function to detect faces, analyze sentiment, and draw a red box around them
	def detect_and_draw_faces(frame):
	# Detect faces using MTCNN
	results = mtcnn.detect_faces(frame)

	# Draw on the frame
	for result in results:
	x, y, w, h = result['box']
	face = frame[y:y+h, x:x+w]
	sentiment = analyze_sentiment(face)
	cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 0, 255), 10) # Thicker red box

	# Calculate position for the text background and the text itself
	text_size = cv2.getTextSize(sentiment, cv2.FONT_HERSHEY_SIMPLEX, TEXT_SIZE, 2)[0]
	text_x = x
	text_y = y - 10
	background_tl = (text_x, text_y - text_size[1])
	background_br = (text_x + text_size[0], text_y + 5)

	# Draw black rectangle as background
	cv2.rectangle(frame, background_tl, background_br, (0, 0, 0), cv2.FILLED)
	# Draw white text on top
	cv2.putText(frame, sentiment, (text_x, text_y), cv2.FONT_HERSHEY_SIMPLEX, TEXT_SIZE, (255, 255, 255), 2)

	return frame

	# Function to capture video from webcam
	def video_stream():
	video_capture = cv2.VideoCapture(0)
	if not video_capture.isOpened():
	st.error("Error: Could not open video capture device.")
	return

	while True:
	ret, frame = video_capture.read()
	if not ret:
	st.error("Error: Failed to read frame from video capture device.")
	break
	yield frame

	video_capture.release()

	# Streamlit UI
	st.markdown(
	"""
	<style>
	.main {
	background-color: #FFFFFF;
	}
	.reportview-container .main .block-container{
	padding-top: 2rem;
	}
	h1 {
	color: #E60012;
	font-family: 'Arial Black', Gadget, sans-serif;
	}
	h2 {
	color: #E60012;
	font-family: 'Arial', sans-serif;
	}
	h3 {
	color: #333333;
	font-family: 'Arial', sans-serif;
	}
	.stButton button {
	background-color: #E60012;
	color: white;
	border-radius: 5px;
	font-size: 16px;
	}
	</style>
	""",
	unsafe_allow_html=True
	)

	st.title("Computer Vision Test Lab")
	st.subheader("Facial Sentiment")

	# Columns for input and output streams
	col1, col2 = st.columns(2)

	with col1:
	st.header("Input Stream")
	st.subheader("Webcam")
	video_placeholder = st.empty()

	with col2:
	st.header("Output Stream")
	st.subheader("Analysis")
	output_placeholder = st.empty()

	sentiment_placeholder = st.empty()

	# Start video stream
	video_capture = cv2.VideoCapture(0)
	if not video_capture.isOpened():
	st.error("Error: Could not open video capture device.")
	else:
	while True:
	ret, frame = video_capture.read()
	if not ret:
	st.error("Error: Failed to read frame from video capture device.")
	break

	# Display the input stream with the red box around the face
	video_placeholder.image(frame, channels="BGR")

	# Detect faces, analyze sentiment, and draw red boxes with sentiment labels
	frame_with_boxes = detect_and_draw_faces(frame)

	# Display the output stream (here it's the same as input, modify as needed)
	output_placeholder.image(frame_with_boxes, channels="BGR")

	# Add a short delay to control the frame rate
	if cv2.waitKey(1) & 0xFF == ord('q'):
	break