capradeepgujaran commited on
Commit
46e12d1
1 Parent(s): bda20be

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +139 -114
app.py CHANGED
@@ -17,41 +17,13 @@ def create_monitor_interface():
17
  self.model_name = "llama-3.2-90b-vision-preview"
18
  self.max_image_size = (800, 800)
19
  self.colors = [(0, 0, 255), (255, 0, 0), (0, 255, 0), (255, 255, 0), (255, 0, 255)]
20
-
21
- def resize_image(self, image):
22
- height, width = image.shape[:2]
23
- if height > self.max_image_size[1] or width > self.max_image_size[0]:
24
- aspect = width / height
25
- if width > height:
26
- new_width = self.max_image_size[0]
27
- new_height = int(new_width / aspect)
28
- else:
29
- new_height = self.max_image_size[1]
30
- new_width = int(new_height * aspect)
31
- return cv2.resize(image, (new_width, new_height), interpolation=cv2.INTER_AREA)
32
- return image
33
 
34
  def analyze_frame(self, frame: np.ndarray) -> str:
35
  if frame is None:
36
  return "No frame received"
37
 
38
- # Convert and resize image
39
- if len(frame.shape) == 2:
40
- frame = cv2.cvtColor(frame, cv2.COLOR_GRAY2RGB)
41
- elif len(frame.shape) == 3 and frame.shape[2] == 4:
42
- frame = cv2.cvtColor(frame, cv2.COLOR_RGBA2RGB)
43
-
44
- frame = self.resize_image(frame)
45
- frame_pil = PILImage.fromarray(frame)
46
-
47
- # High quality image for better analysis
48
- buffered = io.BytesIO()
49
- frame_pil.save(buffered,
50
- format="JPEG",
51
- quality=95,
52
- optimize=True)
53
- img_base64 = base64.b64encode(buffered.getvalue()).decode('utf-8')
54
- image_url = f"data:image/jpeg;base64,{img_base64}"
55
 
56
  try:
57
  completion = self.client.chat.completions.create(
@@ -62,24 +34,32 @@ def create_monitor_interface():
62
  "content": [
63
  {
64
  "type": "text",
65
- "text": """Analyze this workplace image for safety conditions and hazards. Focus on:
 
 
 
 
66
 
67
- 1. Work posture and ergonomics
68
- 2. PPE and safety equipment usage
69
- 3. Tool handling and techniques
70
- 4. Environmental conditions
71
- 5. Equipment and machinery safety
72
- 6. Ground conditions and hazards
73
 
74
- Describe each safety condition observed, using this exact format:
75
- - <location>position</location>: detailed safety observation
76
 
77
- Examples:
78
- - <location>center</location>: Improper kneeling posture without knee protection, risking joint injury
79
- - <location>background</location>: Heavy machinery operating in close proximity creating hazard zone
80
- - <location>ground</location>: Uneven surface and debris creating trip hazards
 
 
 
 
 
 
 
 
 
81
 
82
- Be specific about locations and safety concerns."""
83
  },
84
  {
85
  "type": "image_url",
@@ -99,104 +79,149 @@ Be specific about locations and safety concerns."""
99
  print(f"Analysis error: {str(e)}")
100
  return f"Analysis Error: {str(e)}"
101
 
102
- def process_frame(self, frame: np.ndarray) -> tuple[np.ndarray, str]:
103
- if frame is None:
104
- return None, "No image provided"
105
-
106
- analysis = self.analyze_frame(frame)
107
- display_frame = frame.copy()
108
 
109
- # Parse observations from the formatted response
110
- observations = []
111
- lines = analysis.split('\n')
112
- for line in lines:
113
- if '<location>' in line and '</location>' in line:
114
- start = line.find('<location>') + len('<location>')
115
- end = line.find('</location>')
116
- location = line[start:end].strip()
117
-
118
- # Get the description that follows the location tags
119
- desc_start = line.find('</location>') + len('</location>:')
120
- description = line[desc_start:].strip()
121
-
122
- if location and description:
123
- observations.append({
124
- 'location': location,
125
- 'description': description
126
- })
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
127
 
128
- # Draw observations if we found any
129
- if observations:
130
- annotated_frame = self.draw_observations(display_frame, observations)
131
- return annotated_frame, analysis
132
 
133
- return display_frame, analysis
 
 
 
 
 
 
 
 
134
 
135
  def draw_observations(self, image, observations):
136
- """Draw accurate bounding boxes based on safety issue locations."""
137
  height, width = image.shape[:2]
138
  font = cv2.FONT_HERSHEY_SIMPLEX
139
  font_scale = 0.5
140
  thickness = 2
141
  padding = 10
142
-
143
- def get_region_coordinates(position: str) -> tuple:
144
- """Get coordinates based on position description."""
145
- regions = {
146
- 'center': (width//3, height//3, 2*width//3, 2*height//3),
147
- 'background': (0, 0, width, height),
148
- 'top-left': (0, 0, width//3, height//3),
149
- 'top': (width//3, 0, 2*width//3, height//3),
150
- 'top-right': (2*width//3, 0, width, height//3),
151
- 'left': (0, height//3, width//3, 2*height//3),
152
- 'right': (2*width//3, height//3, width, 2*height//3),
153
- 'bottom-left': (0, 2*height//3, width//3, height),
154
- 'bottom': (width//3, 2*height//3, 2*width//3, height),
155
- 'bottom-right': (2*width//3, 2*height//3, width, height),
156
- 'ground': (0, 2*height//3, width, height),
157
- 'machinery': (0, 0, width//2, height),
158
- 'work-area': (width//4, height//4, 3*width//4, 3*height//4)
159
- }
160
-
161
- # Find best matching region
162
- position = position.lower()
163
- for key in regions.keys():
164
- if key in position:
165
- return regions[key]
166
-
167
- return regions['center']
168
-
169
  for idx, obs in enumerate(observations):
170
  color = self.colors[idx % len(self.colors)]
171
 
172
- # Get coordinates for this observation
173
- x1, y1, x2, y2 = get_region_coordinates(obs['location'])
 
 
 
 
174
 
175
  # Draw rectangle
176
  cv2.rectangle(image, (x1, y1), (x2, y2), color, 2)
177
 
178
- # Add label with background
179
- label = obs['description'][:50] + "..." if len(obs['description']) > 50 else obs['description']
180
- label_size, _ = cv2.getTextSize(label, font, font_scale, thickness)
 
181
 
182
- # Position text above the box
 
183
  text_x = max(0, x1)
184
  text_y = max(label_size[1] + padding, y1 - padding)
185
 
186
- # Draw text background
187
  cv2.rectangle(image,
188
  (text_x, text_y - label_size[1] - padding),
189
  (text_x + label_size[0] + padding, text_y),
190
  color, -1)
191
 
192
- # Draw text
193
  cv2.putText(image, label,
194
  (text_x + padding//2, text_y - padding//2),
195
  font, font_scale, (255, 255, 255), thickness)
196
 
197
  return image
198
 
199
- # Create the main interface
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
200
  monitor = SafetyMonitor()
201
 
202
  with gr.Blocks() as demo:
@@ -204,7 +229,7 @@ Be specific about locations and safety concerns."""
204
 
205
  with gr.Row():
206
  input_image = gr.Image(label="Upload Image")
207
- output_image = gr.Image(label="Annotated Results")
208
 
209
  analysis_text = gr.Textbox(label="Detailed Analysis", lines=5)
210
 
@@ -226,9 +251,9 @@ Be specific about locations and safety concerns."""
226
 
227
  gr.Markdown("""
228
  ## Instructions:
229
- 1. Upload an image to analyze safety conditions
230
- 2. View annotated results showing safety concerns
231
- 3. Read detailed analysis of identified issues
232
  """)
233
 
234
  return demo
 
17
  self.model_name = "llama-3.2-90b-vision-preview"
18
  self.max_image_size = (800, 800)
19
  self.colors = [(0, 0, 255), (255, 0, 0), (0, 255, 0), (255, 255, 0), (255, 0, 255)]
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
  def analyze_frame(self, frame: np.ndarray) -> str:
22
  if frame is None:
23
  return "No frame received"
24
 
25
+ frame = self.preprocess_image(frame)
26
+ image_url = self.encode_image(frame)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
  try:
29
  completion = self.client.chat.completions.create(
 
34
  "content": [
35
  {
36
  "type": "text",
37
+ "text": """Analyze this image for safety hazards and issues. For each identified hazard:
38
+
39
+ 1. Specify the exact location in the image where the hazard exists
40
+ 2. Describe the specific safety concern
41
+ 3. Note any violations or risks
42
 
43
+ Format each observation exactly as:
44
+ - <location>area:hazard description</location>
 
 
 
 
45
 
46
+ Examples of locations: top-left, center, bottom-right, full-area, near-machine, workspace, etc.
 
47
 
48
+ Look for ALL types of safety issues including:
49
+ - Personal protective equipment (PPE)
50
+ - Machine and equipment hazards
51
+ - Ergonomic risks
52
+ - Environmental hazards
53
+ - Fire and electrical safety
54
+ - Chemical safety
55
+ - Fall protection
56
+ - Material handling
57
+ - Access/egress issues
58
+ - Housekeeping
59
+ - Tool safety
60
+ - Emergency equipment
61
 
62
+ Be specific about locations and provide detailed observations."""
63
  },
64
  {
65
  "type": "image_url",
 
79
  print(f"Analysis error: {str(e)}")
80
  return f"Analysis Error: {str(e)}"
81
 
82
+ def preprocess_image(self, frame):
83
+ """Prepare image for analysis."""
84
+ if len(frame.shape) == 2:
85
+ frame = cv2.cvtColor(frame, cv2.COLOR_GRAY2RGB)
86
+ elif len(frame.shape) == 3 and frame.shape[2] == 4:
87
+ frame = cv2.cvtColor(frame, cv2.COLOR_RGBA2RGB)
88
 
89
+ return self.resize_image(frame)
90
+
91
+ def resize_image(self, image):
92
+ """Resize image while maintaining aspect ratio."""
93
+ height, width = image.shape[:2]
94
+ if height > self.max_image_size[1] or width > self.max_image_size[0]:
95
+ aspect = width / height
96
+ if width > height:
97
+ new_width = self.max_image_size[0]
98
+ new_height = int(new_width / aspect)
99
+ else:
100
+ new_height = self.max_image_size[1]
101
+ new_width = int(new_height * aspect)
102
+ return cv2.resize(image, (new_width, new_height), interpolation=cv2.INTER_AREA)
103
+ return image
104
+
105
+ def encode_image(self, frame):
106
+ """Convert image to base64 encoding."""
107
+ frame_pil = PILImage.fromarray(frame)
108
+ buffered = io.BytesIO()
109
+ frame_pil.save(buffered, format="JPEG", quality=95)
110
+ img_base64 = base64.b64encode(buffered.getvalue()).decode('utf-8')
111
+ return f"data:image/jpeg;base64,{img_base64}"
112
+
113
+ def parse_locations(self, observation: str) -> dict:
114
+ """Parse location information from observation."""
115
+ locations = {
116
+ 'full': (0, 0, 1, 1),
117
+ 'top': (0.2, 0, 0.8, 0.3),
118
+ 'bottom': (0.2, 0.7, 0.8, 1),
119
+ 'left': (0, 0.2, 0.3, 0.8),
120
+ 'right': (0.7, 0.2, 1, 0.8),
121
+ 'center': (0.3, 0.3, 0.7, 0.7),
122
+ 'top-left': (0, 0, 0.3, 0.3),
123
+ 'top-right': (0.7, 0, 1, 0.3),
124
+ 'bottom-left': (0, 0.7, 0.3, 1),
125
+ 'bottom-right': (0.7, 0.7, 1, 1),
126
+ 'workspace': (0.2, 0.2, 0.8, 0.8),
127
+ 'near-machine': (0.6, 0.1, 1, 0.9),
128
+ 'floor-area': (0, 0.7, 1, 1),
129
+ 'equipment': (0.5, 0.1, 1, 0.9)
130
+ }
131
 
132
+ # Find best matching location
133
+ text = observation.lower()
134
+ best_match = 'center'
135
+ max_match = 0
136
 
137
+ for loc in locations.keys():
138
+ if loc in text:
139
+ words = loc.split('-')
140
+ matches = sum(1 for word in words if word in text)
141
+ if matches > max_match:
142
+ max_match = matches
143
+ best_match = loc
144
+
145
+ return locations[best_match]
146
 
147
  def draw_observations(self, image, observations):
148
+ """Draw bounding boxes and labels for safety observations."""
149
  height, width = image.shape[:2]
150
  font = cv2.FONT_HERSHEY_SIMPLEX
151
  font_scale = 0.5
152
  thickness = 2
153
  padding = 10
154
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
155
  for idx, obs in enumerate(observations):
156
  color = self.colors[idx % len(self.colors)]
157
 
158
+ # Get relative coordinates and convert to absolute
159
+ rel_coords = self.parse_locations(obs['location'])
160
+ x1 = int(rel_coords[0] * width)
161
+ y1 = int(rel_coords[1] * height)
162
+ x2 = int(rel_coords[2] * width)
163
+ y2 = int(rel_coords[3] * height)
164
 
165
  # Draw rectangle
166
  cv2.rectangle(image, (x1, y1), (x2, y2), color, 2)
167
 
168
+ # Prepare label
169
+ label = obs['description'][:50]
170
+ if len(obs['description']) > 50:
171
+ label += "..."
172
 
173
+ # Calculate text position
174
+ label_size, _ = cv2.getTextSize(label, font, font_scale, thickness)
175
  text_x = max(0, x1)
176
  text_y = max(label_size[1] + padding, y1 - padding)
177
 
178
+ # Draw label background
179
  cv2.rectangle(image,
180
  (text_x, text_y - label_size[1] - padding),
181
  (text_x + label_size[0] + padding, text_y),
182
  color, -1)
183
 
184
+ # Draw label text
185
  cv2.putText(image, label,
186
  (text_x + padding//2, text_y - padding//2),
187
  font, font_scale, (255, 255, 255), thickness)
188
 
189
  return image
190
 
191
+ def process_frame(self, frame: np.ndarray) -> tuple[np.ndarray, str]:
192
+ """Process frame and generate safety analysis with visualizations."""
193
+ if frame is None:
194
+ return None, "No image provided"
195
+
196
+ # Get analysis
197
+ analysis = self.analyze_frame(frame)
198
+ display_frame = frame.copy()
199
+
200
+ # Parse observations
201
+ observations = []
202
+ for line in analysis.split('\n'):
203
+ line = line.strip()
204
+ if line.startswith('-') and '<location>' in line and '</location>' in line:
205
+ start = line.find('<location>') + len('<location>')
206
+ end = line.find('</location>')
207
+ location_description = line[start:end].strip()
208
+
209
+ # Split location and description
210
+ if ':' in location_description:
211
+ location, description = location_description.split(':', 1)
212
+ observations.append({
213
+ 'location': location.strip(),
214
+ 'description': description.strip()
215
+ })
216
+
217
+ # Draw observations if any were found
218
+ if observations:
219
+ annotated_frame = self.draw_observations(display_frame, observations)
220
+ return annotated_frame, analysis
221
+
222
+ return display_frame, analysis
223
+
224
+ # Create interface
225
  monitor = SafetyMonitor()
226
 
227
  with gr.Blocks() as demo:
 
229
 
230
  with gr.Row():
231
  input_image = gr.Image(label="Upload Image")
232
+ output_image = gr.Image(label="Safety Analysis")
233
 
234
  analysis_text = gr.Textbox(label="Detailed Analysis", lines=5)
235
 
 
251
 
252
  gr.Markdown("""
253
  ## Instructions:
254
+ 1. Upload any workplace/safety-related image
255
+ 2. View identified hazards and safety concerns
256
+ 3. Check detailed analysis for recommendations
257
  """)
258
 
259
  return demo