Spaces:

AffordableAI
/

Real_Time_Safety_Monitoring

Sleeping

File size: 12,414 Bytes

7b04d4e
 
 
 
 
49a323c
7b04d4e
33fd6ad
75c2b7c
33fd6ad
771e08a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18cd948
771e08a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1cddd79
771e08a
 
 
 
 
 
 
46e12d1
 
33fd6ad
1cddd79
 
f2ae346
1cddd79
5f3406b
 
 
 
 
771e08a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5f3406b
 
 
f2ae346
 
 
5f3406b
 
1cddd79
 
bda20be
46f4ca8
 
1cddd79
771e08a
1cddd79
bda20be
771e08a
46e12d1
771e08a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bda20be
771e08a
 
 
46e12d1
771e08a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9bf83e0
 
18cd948
9bf83e0
46f4ca8
771e08a
46f4ca8
 
18cd948
771e08a
 
 
bd1163f
771e08a
bd1163f
 
771e08a
 
46e12d1
771e08a
 
bd1163f
 
 
771e08a
bd1163f
771e08a
 
 
bd1163f
771e08a
bd1163f
bda20be
 
9bf83e0
771e08a
9bf83e0
46e12d1
771e08a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46e12d1
771e08a
1cddd79
 
 
7e6153d
7b04d4e
1cddd79
b4f3ea6
46e12d1
1cddd79
18cd948
7b04d4e
b4f3ea6
b6ce847
49a323c
27eab0f
 
 
 
9fd1d46
27eab0f
33fd6ad
b4f3ea6
 
 
 
1cddd79
7b04d4e
bda20be
 
46e12d1
771e08a
 
bda20be
 
1cddd79
 
771e08a

import gradio as gr
import cv2
import numpy as np
from groq import Groq
import time
from PIL import Image as PILImage
import io
import os
import base64

class SafetyMonitor:
    def __init__(self):
        self.client = Groq()
        self.model_name = "llama-3.2-90b-vision-preview"
        self.max_image_size = (800, 800)
        self.colors = [(0, 0, 255), (255, 0, 0), (0, 255, 0), (255, 255, 0), (255, 0, 255)]

    def preprocess_image(self, frame):
        """Prepare image for analysis."""
        if len(frame.shape) == 2:
            frame = cv2.cvtColor(frame, cv2.COLOR_GRAY2RGB)
        elif len(frame.shape) == 3 and frame.shape[2] == 4:
            frame = cv2.cvtColor(frame, cv2.COLOR_RGBA2RGB)
        
        return self.resize_image(frame)

    def resize_image(self, image):
        """Resize image while maintaining aspect ratio."""
        height, width = image.shape[:2]
        if height > self.max_image_size[1] or width > self.max_image_size[0]:
            aspect = width / height
            if width > height:
                new_width = self.max_image_size[0]
                new_height = int(new_width / aspect)
            else:
                new_height = self.max_image_size[1]
                new_width = int(new_height * aspect)
            return cv2.resize(image, (new_width, new_height), interpolation=cv2.INTER_AREA)
        return image

    def encode_image(self, frame):
        """Convert image to base64 encoding."""
        frame_pil = PILImage.fromarray(frame)
        buffered = io.BytesIO()
        frame_pil.save(buffered, format="JPEG", quality=95)
        img_base64 = base64.b64encode(buffered.getvalue()).decode('utf-8')
        return f"data:image/jpeg;base64,{img_base64}"

    def get_scene_context(self, image: np.ndarray) -> str:
        """Get scene understanding to determine context."""
        try:
            image_url = self.encode_image(image)
            completion = self.client.chat.completions.create(
                model=self.model_name,
                messages=[
                    {
                        "role": "user",
                        "content": [
                            {
                                "type": "text",
                                "text": """Describe the key areas and elements visible in this construction/workplace image. Include:
                                1. Worker locations and activities
                                2. Equipment and machinery positions
                                3. Material storage or work areas
                                4. Environmental features
                                5. Access ways and pathways

                                Format as:
                                - Element: precise location description"""
                            },
                            {
                                "type": "image_url",
                                "image_url": {
                                    "url": image_url
                                }
                            }
                        ]
                    }
                ],
                temperature=0.3,
                max_tokens=200,
                stream=False
            )
            return completion.choices[0].message.content
        except Exception as e:
            print(f"Scene analysis error: {str(e)}")
            return ""

    def analyze_frame(self, frame: np.ndarray) -> tuple[str, dict]:
            """Analyze frame and return both safety analysis and scene context."""
            if frame is None:
                return "No frame received", {}
    
            # First get scene understanding
            scene_context = self.get_scene_context(frame)
            scene_regions = self.parse_scene_context(scene_context)
            
            # Then perform safety analysis with context
            frame = self.preprocess_image(frame)
            image_url = self.encode_image(frame)
            
            try:
                completion = self.client.chat.completions.create(
                    model=self.model_name,
                    messages=[
                        {
                            "role": "user",
                            "content": [
                                {
                                    "type": "text",
                                    "text": """Analyze this workplace image for safety concerns. For each identified hazard:
                                    1. Specify the exact location where the hazard exists
                                    2. Describe the specific safety issue
                                    3. Note any violations or risks
    
                                    Format each observation exactly as:
                                    - <location>area:detailed hazard description</location>
    
                                    Consider all safety aspects:
                                    - PPE compliance
                                    - Ergonomic risks
                                    - Equipment safety
                                    - Environmental hazards
                                    - Material handling
                                    - Access/egress
                                    - Work procedures
                                    """
                                },
                                {
                                    "type": "image_url",
                                    "image_url": {
                                        "url": image_url
                                    }
                                }
                            ]
                        }
                    ],
                    temperature=0.5,
                    max_tokens=500,
                    stream=False
                )
                return completion.choices[0].message.content, scene_regions
            except Exception as e:
                print(f"Analysis error: {str(e)}")
                return f"Analysis Error: {str(e)}", scene_regions

        def parse_scene_context(self, context: str) -> dict:
            """Parse scene context to get region mapping."""
            regions = {}
            for line in context.split('\n'):
                if line.strip().startswith('-'):
                    parts = line.strip('- ').split(':')
                    if len(parts) == 2:
                        element_type = parts[0].strip()
                        location = parts[1].strip()
                        regions[element_type] = location
            return regions
    
        def get_region_coordinates(self, location: str, image_shape: tuple) -> tuple:
            """Convert location description to coordinates."""
            height, width = image_shape[:2]
            
            # Parse location description for spatial information
            location = location.lower()
            x1, y1, x2, y2 = 0, 0, width, height  # Default to full image
            
            # Horizontal position
            if 'left' in location:
                x2 = width // 2
            elif 'right' in location:
                x1 = width // 2
            elif 'center' in location:
                x1 = width // 4
                x2 = 3 * width // 4
                
            # Vertical position
            if 'top' in location:
                y2 = height // 2
            elif 'bottom' in location:
                y1 = height // 2
            elif 'middle' in location or 'center' in location:
                y1 = height // 4
                y2 = 3 * height // 4
                
            return (x1, y1, x2, y2)
    
        def draw_observations(self, image: np.ndarray, observations: list, scene_regions: dict) -> np.ndarray:
            """Draw safety observations using scene context."""
            height, width = image.shape[:2]
            font = cv2.FONT_HERSHEY_SIMPLEX
            font_scale = 0.5
            thickness = 2
            padding = 10
    
            for idx, obs in enumerate(observations):
                color = self.colors[idx % len(self.colors)]
                
                # Find best matching region from scene context or parse location directly
                location = obs['location'].lower()
                x1, y1, x2, y2 = self.get_region_coordinates(location, image.shape)
                
                # Draw observation box
                cv2.rectangle(image, (x1, y1), (x2, y2), color, 2)
                
                # Add label
                label = obs['description'][:50] + "..." if len(obs['description']) > 50 else obs['description']
                label_size, _ = cv2.getTextSize(label, font, font_scale, thickness)
                
                # Position text above the box
                text_x = max(0, x1)
                text_y = max(label_size[1] + padding, y1 - padding)
                
                # Draw text background
                cv2.rectangle(image, 
                             (text_x, text_y - label_size[1] - padding),
                             (text_x + label_size[0] + padding, text_y),
                             color, -1)
                
                # Draw text
                cv2.putText(image, label,
                           (text_x + padding//2, text_y - padding//2),
                           font, font_scale, (255, 255, 255), thickness)
            
            return image          

        def process_frame(self, frame: np.ndarray) -> tuple[np.ndarray, str]:
                """Process frame with safety analysis and visualization."""
                if frame is None:
                    return None, "No image provided"
                
                # Get analysis and scene context
                analysis, scene_regions = self.analyze_frame(frame)
                display_frame = frame.copy()
                
                # Parse observations
                observations = []
                for line in analysis.split('\n'):
                    line = line.strip()
                    if line.startswith('-') and '<location>' in line and '</location>' in line:
                        start = line.find('<location>') + len('<location>')
                        end = line.find('</location>')
                        location_description = line[start:end].strip()
                        
                        if ':' in location_description:
                            location, description = location_description.split(':', 1)
                            observations.append({
                                'location': location.strip(),
                                'description': description.strip()
                            })
                
                # Draw observations if any were found
                if observations:
                    annotated_frame = self.draw_observations(display_frame, observations, scene_regions)
                    return annotated_frame, analysis
                
                return display_frame, analysis

def create_monitor_interface():
    monitor = SafetyMonitor()
    
    with gr.Blocks() as demo:
        gr.Markdown("# Safety Analysis System powered by Llama 3.2 90b vision")
        
        with gr.Row():
            input_image = gr.Image(label="Upload Image")
            output_image = gr.Image(label="Safety Analysis")
        
        analysis_text = gr.Textbox(label="Detailed Analysis", lines=5)
            
        def analyze_image(image):
            if image is None:
                return None, "No image provided"
            try:
                processed_frame, analysis = monitor.process_frame(image)
                return processed_frame, analysis
            except Exception as e:
                print(f"Processing error: {str(e)}")
                return None, f"Error processing image: {str(e)}"
            
        input_image.change(
            fn=analyze_image,
            inputs=input_image,
            outputs=[output_image, analysis_text]
        )

        gr.Markdown("""
        ## Instructions:
        1. Upload any workplace/safety-related image
        2. View identified hazards and their locations
        3. Read detailed analysis of safety concerns
        """)

    return demo

if __name__ == "__main__":
    demo = create_monitor_interface()
    demo.launch()