Martin Tomov commited on
Commit
1aa4620
·
verified ·
1 Parent(s): 493acd9

cv2 experiment

Browse files
Files changed (1) hide show
  1. app.py +18 -95
app.py CHANGED
@@ -46,115 +46,38 @@ class DetectionResult:
46
  )
47
  )
48
 
49
- def annotate(image: Union[Image.Image, np.ndarray], detection_results: List[DetectionResult]) -> np.ndarray:
50
- image_cv2 = np.array(image) if isinstance(image, Image.Image) else image
51
- image_cv2 = cv2.cvtColor(image_cv2, cv2.COLOR_RGB2BGR)
52
-
53
- for detection in detection_results:
54
- label = detection.label
55
- score = detection.score
56
- box = detection.box
57
- mask = detection.mask
58
- color = np.random.randint(0, 256, size=3).tolist()
59
-
60
- cv2.rectangle(image_cv2, (box.xmin, box.ymin), (box.xmax, box.ymax), color, 2)
61
- cv2.putText(image_cv2, f'{label}: {score:.2f}', (box.xmin, box.ymin - 10),
62
- cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
63
-
64
- if mask is not None:
65
- mask_uint8 = (mask * 255).astype(np.uint8)
66
- contours, _ = cv2.findContours(mask_uint8, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
67
- cv2.drawContours(image_cv2, contours, -1, color, 2)
68
-
69
- return cv2.cvtColor(image_cv2, cv2.COLOR_BGR2RGB)
70
-
71
- def plot_detections(image: Union[Image.Image, np.ndarray], detections: List[DetectionResult]) -> np.ndarray:
72
- annotated_image = annotate(image, detections)
73
- return annotated_image
74
-
75
- def load_image(image: Union[str, Image.Image]) -> Image.Image:
76
- if isinstance(image, str) and image.startswith("http"):
77
- image = Image.open(requests.get(image, stream=True).raw).convert("RGB")
78
- elif isinstance(image, str):
79
- image = Image.open(image).convert("RGB")
80
- else:
81
- image = image.convert("RGB")
82
- return image
83
-
84
- def get_boxes(detection_results: List[DetectionResult]) -> List[List[List[float]]]:
85
- boxes = []
86
- for result in detection_results:
87
- xyxy = result.box.xyxy
88
- boxes.append(xyxy)
89
- return [boxes]
90
-
91
- def mask_to_polygon(mask: np.ndarray) -> np.ndarray:
92
- contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
93
- if len(contours) == 0:
94
- return np.array([])
95
- largest_contour = max(contours, key=cv2.contourArea)
96
- return largest_contour
97
-
98
- def refine_masks(masks: torch.BoolTensor, polygon_refinement: bool = False) -> List[np.ndarray]:
99
- masks = masks.cpu().float().permute(0, 2, 3, 1).mean(axis=-1).numpy().astype(np.uint8)
100
- masks = (masks > 0).astype(np.uint8)
101
- if polygon_refinement:
102
- for idx, mask in enumerate(masks):
103
- shape = mask.shape
104
- polygon = mask_to_polygon(mask)
105
- masks[idx] = cv2.fillPoly(np.zeros(shape, dtype=np.uint8), [polygon], 1)
106
- return list(masks)
107
-
108
- @spaces.GPU
109
- def detect(image: Image.Image, labels: List[str], threshold: float = 0.3, detector_id: Optional[str] = None) -> List[Dict[str, Any]]:
110
- detector_id = detector_id if detector_id else "IDEA-Research/grounding-dino-base"
111
- object_detector = pipeline(model=detector_id, task="zero-shot-object-detection", device="cuda")
112
- labels = [label if label.endswith(".") else label+"." for label in labels]
113
- results = object_detector(image, candidate_labels=labels, threshold=threshold)
114
- return [DetectionResult.from_dict(result) for result in results]
115
-
116
- @spaces.GPU
117
- def segment(image: Image.Image, detection_results: List[DetectionResult], polygon_refinement: bool = False, segmenter_id: Optional[str] = None) -> List[DetectionResult]:
118
- segmenter_id = segmenter_id if segmenter_id else "martintmv/InsectSAM"
119
- segmentator = AutoModelForMaskGeneration.from_pretrained(segmenter_id).to("cuda")
120
- processor = AutoProcessor.from_pretrained(segmenter_id)
121
- boxes = get_boxes(detection_results)
122
- inputs = processor(images=image, input_boxes=boxes, return_tensors="pt").to("cuda")
123
- outputs = segmentator(**inputs)
124
- masks = processor.post_process_masks(masks=outputs.pred_masks, original_sizes=inputs.original_sizes, reshaped_input_sizes=inputs.reshaped_input_sizes)[0]
125
- masks = refine_masks(masks, polygon_refinement)
126
- for detection_result, mask in zip(detection_results, masks):
127
- detection_result.mask = mask
128
- return detection_results
129
-
130
- def grounded_segmentation(image: Union[Image.Image, str], labels: List[str], threshold: float = 0.3, polygon_refinement: bool = False, detector_id: Optional[str] = None, segmenter_id: Optional[str] = None) -> Tuple[np.ndarray, List[DetectionResult]]:
131
- image = load_image(image)
132
- detections = detect(image, labels, threshold, detector_id)
133
- detections = segment(image, detections, polygon_refinement, segmenter_id)
134
- return np.array(image), detections
135
-
136
- def mask_to_min_max(mask: np.ndarray) -> Tuple[int, int, int, int]:
137
  y, x = np.where(mask)
138
- return x.min(), y.min(), x.max(), y.max()
 
 
139
 
140
- def extract_and_paste_insect(original_image: np.ndarray, detection: DetectionResult, background: np.ndarray) -> None:
141
  mask = detection.mask
142
  xmin, ymin, xmax, ymax = mask_to_min_max(mask)
143
  insect_crop = original_image[ymin:ymax, xmin:xmax]
144
  mask_crop = mask[ymin:ymax, xmin:xmax]
145
 
146
  insect = cv2.bitwise_and(insect_crop, insect_crop, mask=mask_crop)
147
-
148
- x_offset, y_offset = xmin, ymin
149
  x_end, y_end = x_offset + insect.shape[1], y_offset + insect.shape[0]
150
 
151
- background[y_offset:y_end, x_offset:x_end] = insect
 
 
 
 
 
 
 
 
152
 
153
- def create_yellow_background_with_insects(image: np.ndarray, detections: List[DetectionResult]) -> np.ndarray:
154
- yellow_background = np.full((image.shape[0], image.shape[1], 3), (0, 255, 255), dtype=np.uint8)
155
  for detection in detections:
156
  if detection.mask is not None:
157
  extract_and_paste_insect(image, detection, yellow_background)
 
158
  return yellow_background
159
 
160
  def run_length_encoding(mask):
 
46
  )
47
  )
48
 
49
+ def mask_to_min_max(mask):
50
+ """Convert mask to min and max coordinates of the bounding box."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  y, x = np.where(mask)
52
+ xmin, xmax = x.min(), x.max()
53
+ ymin, ymax = y.min(), y.max()
54
+ return xmin, ymin, xmax, ymax
55
 
56
+ def extract_and_paste_insect(original_image, detection, background):
57
  mask = detection.mask
58
  xmin, ymin, xmax, ymax = mask_to_min_max(mask)
59
  insect_crop = original_image[ymin:ymax, xmin:xmax]
60
  mask_crop = mask[ymin:ymax, xmin:xmax]
61
 
62
  insect = cv2.bitwise_and(insect_crop, insect_crop, mask=mask_crop)
63
+ x_offset, y_offset = detection.box.xmin, detection.box.ymin
 
64
  x_end, y_end = x_offset + insect.shape[1], y_offset + insect.shape[0]
65
 
66
+ inverse_mask = cv2.bitwise_not(mask_crop)
67
+ bg_region = background[y_offset:y_end, x_offset:x_end]
68
+ bg_ready = cv2.bitwise_and(bg_region, bg_region, mask=inverse_mask)
69
+ combined = cv2.add(insect, bg_ready)
70
+ background[y_offset:y_end, x_offset:x_end] = combined
71
+
72
+ def create_yellow_background_with_insects(image, detections):
73
+ # Create a plain yellow background
74
+ yellow_background = np.full_like(image, (0, 255, 255), dtype=np.uint8)
75
 
76
+ # Extract and paste each insect on the background
 
77
  for detection in detections:
78
  if detection.mask is not None:
79
  extract_and_paste_insect(image, detection, yellow_background)
80
+
81
  return yellow_background
82
 
83
  def run_length_encoding(mask):