Spaces:

AffordableAI
/

Real_Time_Safety_Monitoring

Running

App Files Files Community

Real_Time_Safety_Monitoring / app.py

capradeepgujaran

Update app.py

771e08a verified about 1 month ago

raw

history blame

12.4 kB

	import gradio as gr
	import cv2
	import numpy as np
	from groq import Groq
	import time
	from PIL import Image as PILImage
	import io
	import os
	import base64

	class SafetyMonitor:
	def __init__(self):
	self.client = Groq()
	self.model_name = "llama-3.2-90b-vision-preview"
	self.max_image_size = (800, 800)
	self.colors = [(0, 0, 255), (255, 0, 0), (0, 255, 0), (255, 255, 0), (255, 0, 255)]

	def preprocess_image(self, frame):
	"""Prepare image for analysis."""
	if len(frame.shape) == 2:
	frame = cv2.cvtColor(frame, cv2.COLOR_GRAY2RGB)
	elif len(frame.shape) == 3 and frame.shape[2] == 4:
	frame = cv2.cvtColor(frame, cv2.COLOR_RGBA2RGB)

	return self.resize_image(frame)

	def resize_image(self, image):
	"""Resize image while maintaining aspect ratio."""
	height, width = image.shape[:2]
	if height > self.max_image_size[1] or width > self.max_image_size[0]:
	aspect = width / height
	if width > height:
	new_width = self.max_image_size[0]
	new_height = int(new_width / aspect)
	else:
	new_height = self.max_image_size[1]
	new_width = int(new_height * aspect)
	return cv2.resize(image, (new_width, new_height), interpolation=cv2.INTER_AREA)
	return image

	def encode_image(self, frame):
	"""Convert image to base64 encoding."""
	frame_pil = PILImage.fromarray(frame)
	buffered = io.BytesIO()
	frame_pil.save(buffered, format="JPEG", quality=95)
	img_base64 = base64.b64encode(buffered.getvalue()).decode('utf-8')
	return f"data:image/jpeg;base64,{img_base64}"

	def get_scene_context(self, image: np.ndarray) -> str:
	"""Get scene understanding to determine context."""
	try:
	image_url = self.encode_image(image)
	completion = self.client.chat.completions.create(
	model=self.model_name,
	messages=[
	{
	"role": "user",
	"content": [
	{
	"type": "text",
	"text": """Describe the key areas and elements visible in this construction/workplace image. Include:
	1. Worker locations and activities
	2. Equipment and machinery positions
	3. Material storage or work areas
	4. Environmental features
	5. Access ways and pathways

	Format as:
	- Element: precise location description"""
	},
	{
	"type": "image_url",
	"image_url": {
	"url": image_url
	}
	}
	]
	}
	],
	temperature=0.3,
	max_tokens=200,
	stream=False
	)
	return completion.choices[0].message.content
	except Exception as e:
	print(f"Scene analysis error: {str(e)}")
	return ""

	def analyze_frame(self, frame: np.ndarray) -> tuple[str, dict]:
	"""Analyze frame and return both safety analysis and scene context."""
	if frame is None:
	return "No frame received", {}

	# First get scene understanding
	scene_context = self.get_scene_context(frame)
	scene_regions = self.parse_scene_context(scene_context)

	# Then perform safety analysis with context
	frame = self.preprocess_image(frame)
	image_url = self.encode_image(frame)

	try:
	completion = self.client.chat.completions.create(
	model=self.model_name,
	messages=[
	{
	"role": "user",
	"content": [
	{
	"type": "text",
	"text": """Analyze this workplace image for safety concerns. For each identified hazard:
	1. Specify the exact location where the hazard exists
	2. Describe the specific safety issue
	3. Note any violations or risks

	Format each observation exactly as:
	- <location>area:detailed hazard description</location>

	Consider all safety aspects:
	- PPE compliance
	- Ergonomic risks
	- Equipment safety
	- Environmental hazards
	- Material handling
	- Access/egress
	- Work procedures
	"""
	},
	{
	"type": "image_url",
	"image_url": {
	"url": image_url
	}
	}
	]
	}
	],
	temperature=0.5,
	max_tokens=500,
	stream=False
	)
	return completion.choices[0].message.content, scene_regions
	except Exception as e:
	print(f"Analysis error: {str(e)}")
	return f"Analysis Error: {str(e)}", scene_regions

	def parse_scene_context(self, context: str) -> dict:
	"""Parse scene context to get region mapping."""
	regions = {}
	for line in context.split('\n'):
	if line.strip().startswith('-'):
	parts = line.strip('- ').split(':')
	if len(parts) == 2:
	element_type = parts[0].strip()
	location = parts[1].strip()
	regions[element_type] = location
	return regions

	def get_region_coordinates(self, location: str, image_shape: tuple) -> tuple:
	"""Convert location description to coordinates."""
	height, width = image_shape[:2]

	# Parse location description for spatial information
	location = location.lower()
	x1, y1, x2, y2 = 0, 0, width, height # Default to full image

	# Horizontal position
	if 'left' in location:
	x2 = width // 2
	elif 'right' in location:
	x1 = width // 2
	elif 'center' in location:
	x1 = width // 4
	x2 = 3 * width // 4

	# Vertical position
	if 'top' in location:
	y2 = height // 2
	elif 'bottom' in location:
	y1 = height // 2
	elif 'middle' in location or 'center' in location:
	y1 = height // 4
	y2 = 3 * height // 4

	return (x1, y1, x2, y2)

	def draw_observations(self, image: np.ndarray, observations: list, scene_regions: dict) -> np.ndarray:
	"""Draw safety observations using scene context."""
	height, width = image.shape[:2]
	font = cv2.FONT_HERSHEY_SIMPLEX
	font_scale = 0.5
	thickness = 2
	padding = 10

	for idx, obs in enumerate(observations):
	color = self.colors[idx % len(self.colors)]

	# Find best matching region from scene context or parse location directly
	location = obs['location'].lower()
	x1, y1, x2, y2 = self.get_region_coordinates(location, image.shape)

	# Draw observation box
	cv2.rectangle(image, (x1, y1), (x2, y2), color, 2)

	# Add label
	label = obs['description'][:50] + "..." if len(obs['description']) > 50 else obs['description']
	label_size, _ = cv2.getTextSize(label, font, font_scale, thickness)

	# Position text above the box
	text_x = max(0, x1)
	text_y = max(label_size[1] + padding, y1 - padding)

	# Draw text background
	cv2.rectangle(image,
	(text_x, text_y - label_size[1] - padding),
	(text_x + label_size[0] + padding, text_y),
	color, -1)

	# Draw text
	cv2.putText(image, label,
	(text_x + padding//2, text_y - padding//2),
	font, font_scale, (255, 255, 255), thickness)

	return image

	def process_frame(self, frame: np.ndarray) -> tuple[np.ndarray, str]:
	"""Process frame with safety analysis and visualization."""
	if frame is None:
	return None, "No image provided"

	# Get analysis and scene context
	analysis, scene_regions = self.analyze_frame(frame)
	display_frame = frame.copy()

	# Parse observations
	observations = []
	for line in analysis.split('\n'):
	line = line.strip()
	if line.startswith('-') and '<location>' in line and '</location>' in line:
	start = line.find('<location>') + len('<location>')
	end = line.find('</location>')
	location_description = line[start:end].strip()

	if ':' in location_description:
	location, description = location_description.split(':', 1)
	observations.append({
	'location': location.strip(),
	'description': description.strip()
	})

	# Draw observations if any were found
	if observations:
	annotated_frame = self.draw_observations(display_frame, observations, scene_regions)
	return annotated_frame, analysis

	return display_frame, analysis

	def create_monitor_interface():
	monitor = SafetyMonitor()

	with gr.Blocks() as demo:
	gr.Markdown("# Safety Analysis System powered by Llama 3.2 90b vision")

	with gr.Row():
	input_image = gr.Image(label="Upload Image")
	output_image = gr.Image(label="Safety Analysis")

	analysis_text = gr.Textbox(label="Detailed Analysis", lines=5)

	def analyze_image(image):
	if image is None:
	return None, "No image provided"
	try:
	processed_frame, analysis = monitor.process_frame(image)
	return processed_frame, analysis
	except Exception as e:
	print(f"Processing error: {str(e)}")
	return None, f"Error processing image: {str(e)}"

	input_image.change(
	fn=analyze_image,
	inputs=input_image,
	outputs=[output_image, analysis_text]
	)

	gr.Markdown("""
	## Instructions:
	1. Upload any workplace/safety-related image
	2. View identified hazards and their locations
	3. Read detailed analysis of safety concerns
	""")

	return demo

	if __name__ == "__main__":
	demo = create_monitor_interface()
	demo.launch()