capradeepgujaran commited on
Commit
519704e
1 Parent(s): 771e08a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +172 -173
app.py CHANGED
@@ -10,13 +10,14 @@ import base64
10
 
11
  class SafetyMonitor:
12
  def __init__(self):
 
13
  self.client = Groq()
14
  self.model_name = "llama-3.2-90b-vision-preview"
15
  self.max_image_size = (800, 800)
16
  self.colors = [(0, 0, 255), (255, 0, 0), (0, 255, 0), (255, 255, 0), (255, 0, 255)]
17
 
18
  def preprocess_image(self, frame):
19
- """Prepare image for analysis."""
20
  if len(frame.shape) == 2:
21
  frame = cv2.cvtColor(frame, cv2.COLOR_GRAY2RGB)
22
  elif len(frame.shape) == 3 and frame.shape[2] == 4:
@@ -46,8 +47,8 @@ class SafetyMonitor:
46
  img_base64 = base64.b64encode(buffered.getvalue()).decode('utf-8')
47
  return f"data:image/jpeg;base64,{img_base64}"
48
 
49
- def get_scene_context(self, image: np.ndarray) -> str:
50
- """Get scene understanding to determine context."""
51
  try:
52
  image_url = self.encode_image(image)
53
  completion = self.client.chat.completions.create(
@@ -58,15 +59,15 @@ class SafetyMonitor:
58
  "content": [
59
  {
60
  "type": "text",
61
- "text": """Describe the key areas and elements visible in this construction/workplace image. Include:
62
  1. Worker locations and activities
63
- 2. Equipment and machinery positions
64
- 3. Material storage or work areas
65
- 4. Environmental features
66
- 5. Access ways and pathways
67
-
68
- Format as:
69
- - Element: precise location description"""
70
  },
71
  {
72
  "type": "image_url",
@@ -86,174 +87,172 @@ class SafetyMonitor:
86
  print(f"Scene analysis error: {str(e)}")
87
  return ""
88
 
89
- def analyze_frame(self, frame: np.ndarray) -> tuple[str, dict]:
90
- """Analyze frame and return both safety analysis and scene context."""
91
- if frame is None:
92
- return "No frame received", {}
93
-
94
- # First get scene understanding
95
- scene_context = self.get_scene_context(frame)
96
- scene_regions = self.parse_scene_context(scene_context)
97
-
98
- # Then perform safety analysis with context
99
- frame = self.preprocess_image(frame)
100
- image_url = self.encode_image(frame)
101
-
102
- try:
103
- completion = self.client.chat.completions.create(
104
- model=self.model_name,
105
- messages=[
106
- {
107
- "role": "user",
108
- "content": [
109
- {
110
- "type": "text",
111
- "text": """Analyze this workplace image for safety concerns. For each identified hazard:
112
- 1. Specify the exact location where the hazard exists
113
- 2. Describe the specific safety issue
114
- 3. Note any violations or risks
115
-
116
- Format each observation exactly as:
117
- - <location>area:detailed hazard description</location>
118
-
119
- Consider all safety aspects:
120
- - PPE compliance
121
- - Ergonomic risks
122
- - Equipment safety
123
- - Environmental hazards
124
- - Material handling
125
- - Access/egress
126
- - Work procedures
127
- """
128
- },
129
- {
130
- "type": "image_url",
131
- "image_url": {
132
- "url": image_url
133
- }
134
  }
135
- ]
136
- }
137
- ],
138
- temperature=0.5,
139
- max_tokens=500,
140
- stream=False
141
- )
142
- return completion.choices[0].message.content, scene_regions
143
- except Exception as e:
144
- print(f"Analysis error: {str(e)}")
145
- return f"Analysis Error: {str(e)}", scene_regions
 
146
 
147
- def parse_scene_context(self, context: str) -> dict:
148
- """Parse scene context to get region mapping."""
149
- regions = {}
150
- for line in context.split('\n'):
151
- if line.strip().startswith('-'):
152
- parts = line.strip('- ').split(':')
153
- if len(parts) == 2:
154
- element_type = parts[0].strip()
155
- location = parts[1].strip()
156
- regions[element_type] = location
157
- return regions
158
-
159
- def get_region_coordinates(self, location: str, image_shape: tuple) -> tuple:
160
- """Convert location description to coordinates."""
161
- height, width = image_shape[:2]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
162
 
163
- # Parse location description for spatial information
164
- location = location.lower()
165
- x1, y1, x2, y2 = 0, 0, width, height # Default to full image
166
 
167
- # Horizontal position
168
- if 'left' in location:
169
- x2 = width // 2
170
- elif 'right' in location:
171
- x1 = width // 2
172
- elif 'center' in location:
173
- x1 = width // 4
174
- x2 = 3 * width // 4
175
-
176
- # Vertical position
177
- if 'top' in location:
178
- y2 = height // 2
179
- elif 'bottom' in location:
180
- y1 = height // 2
181
- elif 'middle' in location or 'center' in location:
182
- y1 = height // 4
183
- y2 = 3 * height // 4
184
-
185
- return (x1, y1, x2, y2)
186
-
187
- def draw_observations(self, image: np.ndarray, observations: list, scene_regions: dict) -> np.ndarray:
188
- """Draw safety observations using scene context."""
189
- height, width = image.shape[:2]
190
- font = cv2.FONT_HERSHEY_SIMPLEX
191
- font_scale = 0.5
192
- thickness = 2
193
- padding = 10
194
-
195
- for idx, obs in enumerate(observations):
196
- color = self.colors[idx % len(self.colors)]
197
-
198
- # Find best matching region from scene context or parse location directly
199
- location = obs['location'].lower()
200
- x1, y1, x2, y2 = self.get_region_coordinates(location, image.shape)
201
-
202
- # Draw observation box
203
- cv2.rectangle(image, (x1, y1), (x2, y2), color, 2)
204
-
205
- # Add label
206
- label = obs['description'][:50] + "..." if len(obs['description']) > 50 else obs['description']
207
- label_size, _ = cv2.getTextSize(label, font, font_scale, thickness)
208
-
209
- # Position text above the box
210
- text_x = max(0, x1)
211
- text_y = max(label_size[1] + padding, y1 - padding)
212
-
213
- # Draw text background
214
- cv2.rectangle(image,
215
- (text_x, text_y - label_size[1] - padding),
216
- (text_x + label_size[0] + padding, text_y),
217
- color, -1)
218
-
219
- # Draw text
220
- cv2.putText(image, label,
221
- (text_x + padding//2, text_y - padding//2),
222
- font, font_scale, (255, 255, 255), thickness)
223
 
224
- return image
 
 
 
 
 
 
 
 
 
 
 
225
 
226
- def process_frame(self, frame: np.ndarray) -> tuple[np.ndarray, str]:
227
- """Process frame with safety analysis and visualization."""
228
- if frame is None:
229
- return None, "No image provided"
230
-
231
- # Get analysis and scene context
232
- analysis, scene_regions = self.analyze_frame(frame)
233
- display_frame = frame.copy()
234
-
235
- # Parse observations
236
- observations = []
237
- for line in analysis.split('\n'):
238
- line = line.strip()
239
- if line.startswith('-') and '<location>' in line and '</location>' in line:
240
- start = line.find('<location>') + len('<location>')
241
- end = line.find('</location>')
242
- location_description = line[start:end].strip()
243
-
244
- if ':' in location_description:
245
- location, description = location_description.split(':', 1)
246
- observations.append({
247
- 'location': location.strip(),
248
- 'description': description.strip()
249
- })
250
-
251
- # Draw observations if any were found
252
- if observations:
253
- annotated_frame = self.draw_observations(display_frame, observations, scene_regions)
254
- return annotated_frame, analysis
255
-
256
- return display_frame, analysis
 
 
 
 
 
257
 
258
  def create_monitor_interface():
259
  monitor = SafetyMonitor()
 
10
 
11
  class SafetyMonitor:
12
  def __init__(self):
13
+ """Initialize Safety Monitor with configuration."""
14
  self.client = Groq()
15
  self.model_name = "llama-3.2-90b-vision-preview"
16
  self.max_image_size = (800, 800)
17
  self.colors = [(0, 0, 255), (255, 0, 0), (0, 255, 0), (255, 255, 0), (255, 0, 255)]
18
 
19
  def preprocess_image(self, frame):
20
+ """Process image for analysis."""
21
  if len(frame.shape) == 2:
22
  frame = cv2.cvtColor(frame, cv2.COLOR_GRAY2RGB)
23
  elif len(frame.shape) == 3 and frame.shape[2] == 4:
 
47
  img_base64 = base64.b64encode(buffered.getvalue()).decode('utf-8')
48
  return f"data:image/jpeg;base64,{img_base64}"
49
 
50
+ def get_scene_context(self, image):
51
+ """Analyze the scene context."""
52
  try:
53
  image_url = self.encode_image(image)
54
  completion = self.client.chat.completions.create(
 
59
  "content": [
60
  {
61
  "type": "text",
62
+ "text": """Analyze this workplace image and identify key areas and elements. Include:
63
  1. Worker locations and activities
64
+ 2. Equipment and machinery
65
+ 3. Materials and storage
66
+ 4. Access routes and paths
67
+ 5. Hazardous areas
68
+
69
+ Format each observation as:
70
+ - Element: specific location in image"""
71
  },
72
  {
73
  "type": "image_url",
 
87
  print(f"Scene analysis error: {str(e)}")
88
  return ""
89
 
90
+ def analyze_frame(self, frame):
91
+ """Perform safety analysis on the frame."""
92
+ if frame is None:
93
+ return "No frame received", {}
94
+
95
+ frame = self.preprocess_image(frame)
96
+ image_url = self.encode_image(frame)
97
+
98
+ try:
99
+ completion = self.client.chat.completions.create(
100
+ model=self.model_name,
101
+ messages=[
102
+ {
103
+ "role": "user",
104
+ "content": [
105
+ {
106
+ "type": "text",
107
+ "text": """Analyze this image for safety hazards. For each hazard:
108
+ 1. Specify the precise location in the image
109
+ 2. Describe the safety concern or violation
110
+ 3. Indicate the potential risk
111
+
112
+ Format each finding as:
113
+ - <location>position:detailed safety concern</location>
114
+
115
+ Look for all types of safety issues:
116
+ - PPE compliance
117
+ - Ergonomic risks
118
+ - Equipment safety
119
+ - Environmental hazards
120
+ - Material handling
121
+ - Work procedures
122
+ - Access and egress
123
+ - Housekeeping"""
124
+ },
125
+ {
126
+ "type": "image_url",
127
+ "image_url": {
128
+ "url": image_url
 
 
 
 
 
 
129
  }
130
+ }
131
+ ]
132
+ }
133
+ ],
134
+ temperature=0.5,
135
+ max_tokens=500,
136
+ stream=False
137
+ )
138
+ return completion.choices[0].message.content, {}
139
+ except Exception as e:
140
+ print(f"Analysis error: {str(e)}")
141
+ return f"Analysis Error: {str(e)}", {}
142
 
143
+ def get_region_coordinates(self, position, image_shape):
144
+ """Convert textual position to coordinates."""
145
+ height, width = image_shape[:2]
146
+
147
+ # Parse position for spatial information
148
+ position = position.lower()
149
+
150
+ # Base coordinates (full image)
151
+ x1, y1, x2, y2 = 0, 0, width, height
152
+
153
+ # Define regions
154
+ regions = {
155
+ 'center': (width//3, height//3, 2*width//3, 2*height//3),
156
+ 'top': (width//3, 0, 2*width//3, height//3),
157
+ 'bottom': (width//3, 2*height//3, 2*width//3, height),
158
+ 'left': (0, height//3, width//3, 2*height//3),
159
+ 'right': (2*width//3, height//3, width, 2*height//3),
160
+ 'top-left': (0, 0, width//3, height//3),
161
+ 'top-right': (2*width//3, 0, width, height//3),
162
+ 'bottom-left': (0, 2*height//3, width//3, height),
163
+ 'bottom-right': (2*width//3, 2*height//3, width, height),
164
+ 'upper': (0, 0, width, height//2),
165
+ 'lower': (0, height//2, width, height),
166
+ 'middle': (0, height//3, width, 2*height//3)
167
+ }
168
+
169
+ # Find best matching region
170
+ best_match = None
171
+ max_match = 0
172
+ for region, coords in regions.items():
173
+ if region in position:
174
+ words = region.split('-')
175
+ matches = sum(1 for word in words if word in position)
176
+ if matches > max_match:
177
+ max_match = matches
178
+ best_match = coords
179
+
180
+ return best_match if best_match else (x1, y1, x2, y2)
181
+
182
+ def draw_observations(self, image, observations):
183
+ """Draw bounding boxes and labels for safety observations."""
184
+ height, width = image.shape[:2]
185
+ font = cv2.FONT_HERSHEY_SIMPLEX
186
+ font_scale = 0.5
187
+ thickness = 2
188
+ padding = 10
189
+
190
+ for idx, obs in enumerate(observations):
191
+ color = self.colors[idx % len(self.colors)]
192
 
193
+ # Get coordinates for this observation
194
+ x1, y1, x2, y2 = self.get_region_coordinates(obs['location'], image.shape)
 
195
 
196
+ # Draw rectangle
197
+ cv2.rectangle(image, (x1, y1), (x2, y2), color, 2)
198
+
199
+ # Add label with background
200
+ label = obs['description'][:50] + "..." if len(obs['description']) > 50 else obs['description']
201
+ label_size, _ = cv2.getTextSize(label, font, font_scale, thickness)
202
+
203
+ # Position text above the box
204
+ text_x = max(0, x1)
205
+ text_y = max(label_size[1] + padding, y1 - padding)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
206
 
207
+ # Draw text background
208
+ cv2.rectangle(image,
209
+ (text_x, text_y - label_size[1] - padding),
210
+ (text_x + label_size[0] + padding, text_y),
211
+ color, -1)
212
+
213
+ # Draw text
214
+ cv2.putText(image, label,
215
+ (text_x + padding//2, text_y - padding//2),
216
+ font, font_scale, (255, 255, 255), thickness)
217
+
218
+ return image
219
 
220
+ def process_frame(self, frame):
221
+ """Main processing pipeline for safety analysis."""
222
+ if frame is None:
223
+ return None, "No image provided"
224
+
225
+ try:
226
+ # Get analysis
227
+ analysis, _ = self.analyze_frame(frame)
228
+ display_frame = frame.copy()
229
+
230
+ # Parse observations
231
+ observations = []
232
+ for line in analysis.split('\n'):
233
+ line = line.strip()
234
+ if line.startswith('-') and '<location>' in line and '</location>' in line:
235
+ start = line.find('<location>') + len('<location>')
236
+ end = line.find('</location>')
237
+ location_description = line[start:end].strip()
238
+
239
+ if ':' in location_description:
240
+ location, description = location_description.split(':', 1)
241
+ observations.append({
242
+ 'location': location.strip(),
243
+ 'description': description.strip()
244
+ })
245
+
246
+ # Draw observations
247
+ if observations:
248
+ annotated_frame = self.draw_observations(display_frame, observations)
249
+ return annotated_frame, analysis
250
+
251
+ return display_frame, analysis
252
+
253
+ except Exception as e:
254
+ print(f"Processing error: {str(e)}")
255
+ return None, f"Error processing image: {str(e)}"
256
 
257
  def create_monitor_interface():
258
  monitor = SafetyMonitor()