capradeepgujaran commited on
Commit
771e08a
1 Parent(s): 46e12d1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +197 -161
app.py CHANGED
@@ -8,20 +8,94 @@ import io
8
  import os
9
  import base64
10
 
11
- def create_monitor_interface():
12
- api_key = os.getenv("GROQ_API_KEY")
13
-
14
- class SafetyMonitor:
15
- def __init__(self):
16
- self.client = Groq()
17
- self.model_name = "llama-3.2-90b-vision-preview"
18
- self.max_image_size = (800, 800)
19
- self.colors = [(0, 0, 255), (255, 0, 0), (0, 255, 0), (255, 255, 0), (255, 0, 255)]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
- def analyze_frame(self, frame: np.ndarray) -> str:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  if frame is None:
23
- return "No frame received"
24
-
 
 
 
 
 
25
  frame = self.preprocess_image(frame)
26
  image_url = self.encode_image(frame)
27
 
@@ -34,32 +108,23 @@ def create_monitor_interface():
34
  "content": [
35
  {
36
  "type": "text",
37
- "text": """Analyze this image for safety hazards and issues. For each identified hazard:
38
-
39
- 1. Specify the exact location in the image where the hazard exists
40
- 2. Describe the specific safety concern
41
- 3. Note any violations or risks
42
-
43
- Format each observation exactly as:
44
- - <location>area:hazard description</location>
45
-
46
- Examples of locations: top-left, center, bottom-right, full-area, near-machine, workspace, etc.
47
-
48
- Look for ALL types of safety issues including:
49
- - Personal protective equipment (PPE)
50
- - Machine and equipment hazards
51
- - Ergonomic risks
52
- - Environmental hazards
53
- - Fire and electrical safety
54
- - Chemical safety
55
- - Fall protection
56
- - Material handling
57
- - Access/egress issues
58
- - Housekeeping
59
- - Tool safety
60
- - Emergency equipment
61
-
62
- Be specific about locations and provide detailed observations."""
63
  },
64
  {
65
  "type": "image_url",
@@ -74,154 +139,123 @@ Be specific about locations and provide detailed observations."""
74
  max_tokens=500,
75
  stream=False
76
  )
77
- return completion.choices[0].message.content
78
  except Exception as e:
79
  print(f"Analysis error: {str(e)}")
80
- return f"Analysis Error: {str(e)}"
81
-
82
- def preprocess_image(self, frame):
83
- """Prepare image for analysis."""
84
- if len(frame.shape) == 2:
85
- frame = cv2.cvtColor(frame, cv2.COLOR_GRAY2RGB)
86
- elif len(frame.shape) == 3 and frame.shape[2] == 4:
87
- frame = cv2.cvtColor(frame, cv2.COLOR_RGBA2RGB)
88
-
89
- return self.resize_image(frame)
90
-
91
- def resize_image(self, image):
92
- """Resize image while maintaining aspect ratio."""
93
- height, width = image.shape[:2]
94
- if height > self.max_image_size[1] or width > self.max_image_size[0]:
95
- aspect = width / height
96
- if width > height:
97
- new_width = self.max_image_size[0]
98
- new_height = int(new_width / aspect)
99
- else:
100
- new_height = self.max_image_size[1]
101
- new_width = int(new_height * aspect)
102
- return cv2.resize(image, (new_width, new_height), interpolation=cv2.INTER_AREA)
103
- return image
104
 
105
- def encode_image(self, frame):
106
- """Convert image to base64 encoding."""
107
- frame_pil = PILImage.fromarray(frame)
108
- buffered = io.BytesIO()
109
- frame_pil.save(buffered, format="JPEG", quality=95)
110
- img_base64 = base64.b64encode(buffered.getvalue()).decode('utf-8')
111
- return f"data:image/jpeg;base64,{img_base64}"
112
-
113
- def parse_locations(self, observation: str) -> dict:
114
- """Parse location information from observation."""
115
- locations = {
116
- 'full': (0, 0, 1, 1),
117
- 'top': (0.2, 0, 0.8, 0.3),
118
- 'bottom': (0.2, 0.7, 0.8, 1),
119
- 'left': (0, 0.2, 0.3, 0.8),
120
- 'right': (0.7, 0.2, 1, 0.8),
121
- 'center': (0.3, 0.3, 0.7, 0.7),
122
- 'top-left': (0, 0, 0.3, 0.3),
123
- 'top-right': (0.7, 0, 1, 0.3),
124
- 'bottom-left': (0, 0.7, 0.3, 1),
125
- 'bottom-right': (0.7, 0.7, 1, 1),
126
- 'workspace': (0.2, 0.2, 0.8, 0.8),
127
- 'near-machine': (0.6, 0.1, 1, 0.9),
128
- 'floor-area': (0, 0.7, 1, 1),
129
- 'equipment': (0.5, 0.1, 1, 0.9)
130
- }
131
-
132
- # Find best matching location
133
- text = observation.lower()
134
- best_match = 'center'
135
- max_match = 0
136
 
137
- for loc in locations.keys():
138
- if loc in text:
139
- words = loc.split('-')
140
- matches = sum(1 for word in words if word in text)
141
- if matches > max_match:
142
- max_match = matches
143
- best_match = loc
144
 
145
- return locations[best_match]
146
-
147
- def draw_observations(self, image, observations):
148
- """Draw bounding boxes and labels for safety observations."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
149
  height, width = image.shape[:2]
150
  font = cv2.FONT_HERSHEY_SIMPLEX
151
  font_scale = 0.5
152
  thickness = 2
153
  padding = 10
154
-
155
  for idx, obs in enumerate(observations):
156
  color = self.colors[idx % len(self.colors)]
157
 
158
- # Get relative coordinates and convert to absolute
159
- rel_coords = self.parse_locations(obs['location'])
160
- x1 = int(rel_coords[0] * width)
161
- y1 = int(rel_coords[1] * height)
162
- x2 = int(rel_coords[2] * width)
163
- y2 = int(rel_coords[3] * height)
164
 
165
- # Draw rectangle
166
  cv2.rectangle(image, (x1, y1), (x2, y2), color, 2)
167
 
168
- # Prepare label
169
- label = obs['description'][:50]
170
- if len(obs['description']) > 50:
171
- label += "..."
172
-
173
- # Calculate text position
174
  label_size, _ = cv2.getTextSize(label, font, font_scale, thickness)
 
 
175
  text_x = max(0, x1)
176
  text_y = max(label_size[1] + padding, y1 - padding)
177
 
178
- # Draw label background
179
  cv2.rectangle(image,
180
- (text_x, text_y - label_size[1] - padding),
181
- (text_x + label_size[0] + padding, text_y),
182
- color, -1)
183
 
184
- # Draw label text
185
  cv2.putText(image, label,
186
  (text_x + padding//2, text_y - padding//2),
187
  font, font_scale, (255, 255, 255), thickness)
188
 
189
- return image
190
 
191
  def process_frame(self, frame: np.ndarray) -> tuple[np.ndarray, str]:
192
- """Process frame and generate safety analysis with visualizations."""
193
- if frame is None:
194
- return None, "No image provided"
195
-
196
- # Get analysis
197
- analysis = self.analyze_frame(frame)
198
- display_frame = frame.copy()
199
-
200
- # Parse observations
201
- observations = []
202
- for line in analysis.split('\n'):
203
- line = line.strip()
204
- if line.startswith('-') and '<location>' in line and '</location>' in line:
205
- start = line.find('<location>') + len('<location>')
206
- end = line.find('</location>')
207
- location_description = line[start:end].strip()
208
-
209
- # Split location and description
210
- if ':' in location_description:
211
- location, description = location_description.split(':', 1)
212
- observations.append({
213
- 'location': location.strip(),
214
- 'description': description.strip()
215
- })
216
-
217
- # Draw observations if any were found
218
- if observations:
219
- annotated_frame = self.draw_observations(display_frame, observations)
220
- return annotated_frame, analysis
221
-
222
- return display_frame, analysis
223
 
224
- # Create interface
225
  monitor = SafetyMonitor()
226
 
227
  with gr.Blocks() as demo:
@@ -252,11 +286,13 @@ Be specific about locations and provide detailed observations."""
252
  gr.Markdown("""
253
  ## Instructions:
254
  1. Upload any workplace/safety-related image
255
- 2. View identified hazards and safety concerns
256
- 3. Check detailed analysis for recommendations
257
  """)
258
 
259
  return demo
260
 
261
- demo = create_monitor_interface()
262
- demo.launch()
 
 
 
8
  import os
9
  import base64
10
 
11
+ class SafetyMonitor:
12
+ def __init__(self):
13
+ self.client = Groq()
14
+ self.model_name = "llama-3.2-90b-vision-preview"
15
+ self.max_image_size = (800, 800)
16
+ self.colors = [(0, 0, 255), (255, 0, 0), (0, 255, 0), (255, 255, 0), (255, 0, 255)]
17
+
18
+ def preprocess_image(self, frame):
19
+ """Prepare image for analysis."""
20
+ if len(frame.shape) == 2:
21
+ frame = cv2.cvtColor(frame, cv2.COLOR_GRAY2RGB)
22
+ elif len(frame.shape) == 3 and frame.shape[2] == 4:
23
+ frame = cv2.cvtColor(frame, cv2.COLOR_RGBA2RGB)
24
+
25
+ return self.resize_image(frame)
26
+
27
+ def resize_image(self, image):
28
+ """Resize image while maintaining aspect ratio."""
29
+ height, width = image.shape[:2]
30
+ if height > self.max_image_size[1] or width > self.max_image_size[0]:
31
+ aspect = width / height
32
+ if width > height:
33
+ new_width = self.max_image_size[0]
34
+ new_height = int(new_width / aspect)
35
+ else:
36
+ new_height = self.max_image_size[1]
37
+ new_width = int(new_height * aspect)
38
+ return cv2.resize(image, (new_width, new_height), interpolation=cv2.INTER_AREA)
39
+ return image
40
+
41
+ def encode_image(self, frame):
42
+ """Convert image to base64 encoding."""
43
+ frame_pil = PILImage.fromarray(frame)
44
+ buffered = io.BytesIO()
45
+ frame_pil.save(buffered, format="JPEG", quality=95)
46
+ img_base64 = base64.b64encode(buffered.getvalue()).decode('utf-8')
47
+ return f"data:image/jpeg;base64,{img_base64}"
48
 
49
+ def get_scene_context(self, image: np.ndarray) -> str:
50
+ """Get scene understanding to determine context."""
51
+ try:
52
+ image_url = self.encode_image(image)
53
+ completion = self.client.chat.completions.create(
54
+ model=self.model_name,
55
+ messages=[
56
+ {
57
+ "role": "user",
58
+ "content": [
59
+ {
60
+ "type": "text",
61
+ "text": """Describe the key areas and elements visible in this construction/workplace image. Include:
62
+ 1. Worker locations and activities
63
+ 2. Equipment and machinery positions
64
+ 3. Material storage or work areas
65
+ 4. Environmental features
66
+ 5. Access ways and pathways
67
+
68
+ Format as:
69
+ - Element: precise location description"""
70
+ },
71
+ {
72
+ "type": "image_url",
73
+ "image_url": {
74
+ "url": image_url
75
+ }
76
+ }
77
+ ]
78
+ }
79
+ ],
80
+ temperature=0.3,
81
+ max_tokens=200,
82
+ stream=False
83
+ )
84
+ return completion.choices[0].message.content
85
+ except Exception as e:
86
+ print(f"Scene analysis error: {str(e)}")
87
+ return ""
88
+
89
+ def analyze_frame(self, frame: np.ndarray) -> tuple[str, dict]:
90
+ """Analyze frame and return both safety analysis and scene context."""
91
  if frame is None:
92
+ return "No frame received", {}
93
+
94
+ # First get scene understanding
95
+ scene_context = self.get_scene_context(frame)
96
+ scene_regions = self.parse_scene_context(scene_context)
97
+
98
+ # Then perform safety analysis with context
99
  frame = self.preprocess_image(frame)
100
  image_url = self.encode_image(frame)
101
 
 
108
  "content": [
109
  {
110
  "type": "text",
111
+ "text": """Analyze this workplace image for safety concerns. For each identified hazard:
112
+ 1. Specify the exact location where the hazard exists
113
+ 2. Describe the specific safety issue
114
+ 3. Note any violations or risks
115
+
116
+ Format each observation exactly as:
117
+ - <location>area:detailed hazard description</location>
118
+
119
+ Consider all safety aspects:
120
+ - PPE compliance
121
+ - Ergonomic risks
122
+ - Equipment safety
123
+ - Environmental hazards
124
+ - Material handling
125
+ - Access/egress
126
+ - Work procedures
127
+ """
 
 
 
 
 
 
 
 
 
128
  },
129
  {
130
  "type": "image_url",
 
139
  max_tokens=500,
140
  stream=False
141
  )
142
+ return completion.choices[0].message.content, scene_regions
143
  except Exception as e:
144
  print(f"Analysis error: {str(e)}")
145
+ return f"Analysis Error: {str(e)}", scene_regions
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
146
 
147
+ def parse_scene_context(self, context: str) -> dict:
148
+ """Parse scene context to get region mapping."""
149
+ regions = {}
150
+ for line in context.split('\n'):
151
+ if line.strip().startswith('-'):
152
+ parts = line.strip('- ').split(':')
153
+ if len(parts) == 2:
154
+ element_type = parts[0].strip()
155
+ location = parts[1].strip()
156
+ regions[element_type] = location
157
+ return regions
158
+
159
+ def get_region_coordinates(self, location: str, image_shape: tuple) -> tuple:
160
+ """Convert location description to coordinates."""
161
+ height, width = image_shape[:2]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
162
 
163
+ # Parse location description for spatial information
164
+ location = location.lower()
165
+ x1, y1, x2, y2 = 0, 0, width, height # Default to full image
 
 
 
 
166
 
167
+ # Horizontal position
168
+ if 'left' in location:
169
+ x2 = width // 2
170
+ elif 'right' in location:
171
+ x1 = width // 2
172
+ elif 'center' in location:
173
+ x1 = width // 4
174
+ x2 = 3 * width // 4
175
+
176
+ # Vertical position
177
+ if 'top' in location:
178
+ y2 = height // 2
179
+ elif 'bottom' in location:
180
+ y1 = height // 2
181
+ elif 'middle' in location or 'center' in location:
182
+ y1 = height // 4
183
+ y2 = 3 * height // 4
184
+
185
+ return (x1, y1, x2, y2)
186
+
187
+ def draw_observations(self, image: np.ndarray, observations: list, scene_regions: dict) -> np.ndarray:
188
+ """Draw safety observations using scene context."""
189
  height, width = image.shape[:2]
190
  font = cv2.FONT_HERSHEY_SIMPLEX
191
  font_scale = 0.5
192
  thickness = 2
193
  padding = 10
194
+
195
  for idx, obs in enumerate(observations):
196
  color = self.colors[idx % len(self.colors)]
197
 
198
+ # Find best matching region from scene context or parse location directly
199
+ location = obs['location'].lower()
200
+ x1, y1, x2, y2 = self.get_region_coordinates(location, image.shape)
 
 
 
201
 
202
+ # Draw observation box
203
  cv2.rectangle(image, (x1, y1), (x2, y2), color, 2)
204
 
205
+ # Add label
206
+ label = obs['description'][:50] + "..." if len(obs['description']) > 50 else obs['description']
 
 
 
 
207
  label_size, _ = cv2.getTextSize(label, font, font_scale, thickness)
208
+
209
+ # Position text above the box
210
  text_x = max(0, x1)
211
  text_y = max(label_size[1] + padding, y1 - padding)
212
 
213
+ # Draw text background
214
  cv2.rectangle(image,
215
+ (text_x, text_y - label_size[1] - padding),
216
+ (text_x + label_size[0] + padding, text_y),
217
+ color, -1)
218
 
219
+ # Draw text
220
  cv2.putText(image, label,
221
  (text_x + padding//2, text_y - padding//2),
222
  font, font_scale, (255, 255, 255), thickness)
223
 
224
+ return image
225
 
226
  def process_frame(self, frame: np.ndarray) -> tuple[np.ndarray, str]:
227
+ """Process frame with safety analysis and visualization."""
228
+ if frame is None:
229
+ return None, "No image provided"
230
+
231
+ # Get analysis and scene context
232
+ analysis, scene_regions = self.analyze_frame(frame)
233
+ display_frame = frame.copy()
234
+
235
+ # Parse observations
236
+ observations = []
237
+ for line in analysis.split('\n'):
238
+ line = line.strip()
239
+ if line.startswith('-') and '<location>' in line and '</location>' in line:
240
+ start = line.find('<location>') + len('<location>')
241
+ end = line.find('</location>')
242
+ location_description = line[start:end].strip()
243
+
244
+ if ':' in location_description:
245
+ location, description = location_description.split(':', 1)
246
+ observations.append({
247
+ 'location': location.strip(),
248
+ 'description': description.strip()
249
+ })
250
+
251
+ # Draw observations if any were found
252
+ if observations:
253
+ annotated_frame = self.draw_observations(display_frame, observations, scene_regions)
254
+ return annotated_frame, analysis
255
+
256
+ return display_frame, analysis
 
257
 
258
+ def create_monitor_interface():
259
  monitor = SafetyMonitor()
260
 
261
  with gr.Blocks() as demo:
 
286
  gr.Markdown("""
287
  ## Instructions:
288
  1. Upload any workplace/safety-related image
289
+ 2. View identified hazards and their locations
290
+ 3. Read detailed analysis of safety concerns
291
  """)
292
 
293
  return demo
294
 
295
+ if __name__ == "__main__":
296
+ demo = create_monitor_interface()
297
+ demo.launch()
298
+