Spaces:

martintomov
/

InsectSAM

Running on Zero

App Files Files Community

Martin Tomov commited on May 21, 2024

Commit

9f37f40

verified ·

1 Parent(s): b8429de

runtime broken fix

Browse files

Files changed (1) hide show

app.py +201 -0

app.py ADDED Viewed

	@@ -0,0 +1,201 @@

+import os
+os.system('pip install gradio==4.29.0')  # as gradio==4.29.0 doesn't work in requirements.txt
+import random
+from dataclasses import dataclass
+from typing import Any, List, Dict, Optional, Union, Tuple
+import cv2
+import torch
+import requests
+import numpy as np
+from PIL import Image
+import matplotlib.pyplot as plt
+from transformers import AutoModelForMaskGeneration, AutoProcessor, pipeline
+import gradio as gr
+import spaces
+@dataclass
+class BoundingBox:
+    xmin: int
+    ymin: int
+    xmax: int
+    ymax: int
+    @property
+    def xyxy(self) -> List[float]:
+        return [self.xmin, self.ymin, self.xmax, self.ymax]
+@dataclass
+class DetectionResult:
+    score: float
+    label: str
+    box: BoundingBox
+    mask: Optional[np.ndarray] = None
+    @classmethod
+    def from_dict(cls, detection_dict: Dict) -> 'DetectionResult':
+        return cls(
+            score=detection_dict['score'],
+            label=detection_dict['label'],
+            box=BoundingBox(
+                xmin=detection_dict['box']['xmin'],
+                ymin=detection_dict['box']['ymin'],
+                xmax=detection_dict['box']['xmax'],
+                ymax=detection_dict['box']['ymax']
+            )
+        )
+def annotate(image: Union[Image.Image, np.ndarray], detection_results: List[DetectionResult]) -> np.ndarray:
+    image_cv2 = np.array(image) if isinstance(image, Image.Image) else image
+    image_cv2 = cv2.cvtColor(image_cv2, cv2.COLOR_RGB2BGR)
+    for detection in detection_results:
+        label = detection.label
+        score = detection.score
+        box = detection.box
+        mask = detection.mask
+        color = np.random.randint(0, 256, size=3).tolist()
+        cv2.rectangle(image_cv2, (box.xmin, box.ymin), (box.xmax, box.ymax), color, 2)
+        cv2.putText(image_cv2, f'{label}: {score:.2f}', (box.xmin, box.ymin - 10),
+                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
+        if mask is not None:
+            mask_uint8 = (mask * 255).astype(np.uint8)
+            contours, _ = cv2.findContours(mask_uint8, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+            cv2.drawContours(image_cv2, contours, -1, color, 2)
+    return cv2.cvtColor(image_cv2, cv2.COLOR_BGR2RGB)
+def plot_detections(image: Union[Image.Image, np.ndarray], detections: List[DetectionResult]) -> np.ndarray:
+    annotated_image = annotate(image, detections)
+    return annotated_image
+def load_image(image: Union[str, Image.Image]) -> Image.Image:
+    if isinstance(image, str) and image.startswith("http"):
+        image = Image.open(requests.get(image, stream=True).raw).convert("RGB")
+    elif isinstance(image, str):
+        image = Image.open(image).convert("RGB")
+    else:
+        image = image.convert("RGB")
+    return image
+def get_boxes(detection_results: List[DetectionResult]) -> List[List[List[float]]]:
+    boxes = []
+    for result in detection_results:
+        xyxy = result.box.xyxy
+        boxes.append(xyxy)
+    return [boxes]
+def mask_to_polygon(mask: np.ndarray) -> np.ndarray:
+    contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+    if len(contours) == 0:
+        return np.array([])
+    largest_contour = max(contours, key=cv2.contourArea)
+    return largest_contour
+def refine_masks(masks: torch.BoolTensor, polygon_refinement: bool = False) -> List[np.ndarray]:
+    masks = masks.cpu().float().permute(0, 2, 3, 1).mean(axis=-1).numpy().astype(np.uint8)
+    masks = (masks > 0).astype(np.uint8)
+    if polygon_refinement:
+        for idx, mask in enumerate(masks):
+            shape = mask.shape
+            polygon = mask_to_polygon(mask)
+            masks[idx] = cv2.fillPoly(np.zeros(shape, dtype=np.uint8), [polygon], 1)
+    return list(masks)
+@spaces.GPU
+def detect(image: Image.Image, labels: List[str], threshold: float = 0.3, detector_id: Optional[str] = None) -> List[Dict[str, Any]]:
+    detector_id = detector_id if detector_id else "IDEA-Research/grounding-dino-base"
+    object_detector = pipeline(model=detector_id, task="zero-shot-object-detection", device="cuda")
+    labels = [label if label.endswith(".") else label+"." for label in labels]
+    results = object_detector(image, candidate_labels=labels, threshold=threshold)
+    return [DetectionResult.from_dict(result) for result in results]
+@spaces.GPU
+def segment(image: Image.Image, detection_results: List[DetectionResult], polygon_refinement: bool = False, segmenter_id: Optional[str] = None) -> List[DetectionResult]:
+    segmenter_id = segmenter_id if segmenter_id else "martintmv/InsectSAM"
+    segmentator = AutoModelForMaskGeneration.from_pretrained(segmenter_id).to("cuda")
+    processor = AutoProcessor.from_pretrained(segmenter_id)
+    boxes = get_boxes(detection_results)
+    inputs = processor(images=image, input_boxes=boxes, return_tensors="pt").to("cuda")
+    outputs = segmentator(**inputs)
+    masks = processor.post_process_masks(masks=outputs.pred_masks, original_sizes=inputs.original_sizes, reshaped_input_sizes=inputs.reshaped_input_sizes)[0]
+    masks = refine_masks(masks, polygon_refinement)
+    for detection_result, mask in zip(detection_results, masks):
+        detection_result.mask = mask
+    return detection_results
+def grounded_segmentation(image: Union[Image.Image, str], labels: List[str], threshold: float = 0.3, polygon_refinement: bool = False, detector_id: Optional[str] = None, segmenter_id: Optional[str] = None) -> Tuple[np.ndarray, List[DetectionResult]]:
+    image = load_image(image)
+    detections = detect(image, labels, threshold, detector_id)
+    detections = segment(image, detections, polygon_refinement, segmenter_id)
+    return np.array(image), detections
+def mask_to_min_max(mask: np.ndarray) -> Tuple[int, int, int, int]:
+    y, x = np.where(mask)
+    return x.min(), y.min(), x.max(), y.max()
+def extract_and_paste_insect(original_image: np.ndarray, detection: DetectionResult, background: np.ndarray) -> None:
+    mask = detection.mask
+    xmin, ymin, xmax, ymax = mask_to_min_max(mask)
+    insect_crop = original_image[ymin:ymax, xmin:xmax]
+    mask_crop = mask[ymin:ymax, xmin:xmax]
+    # Ensure that we keep the original colors of the insect
+    insect = cv2.bitwise_and(insect_crop, insect_crop, mask=mask_crop)
+    x_offset, y_offset = xmin, ymin
+    x_end, y_end = x_offset + insect.shape[1], y_offset + insect.shape[0]
+    # Place the insect onto the yellow background
+    background[y_offset:y_end, x_offset:x_end] = insect
+def create_yellow_background_with_insects(image: np.ndarray, detections: List[DetectionResult]) -> np.ndarray:
+    yellow_background = np.full((image.shape[0], image.shape[1], 3), (0, 255, 255), dtype=np.uint8)
+    for detection in detections:
+        if detection.mask is not None:
+            extract_and_paste_insect(image, detection, yellow_background)
+    return yellow_background
+def draw_classification_boxes(image_with_insects, detections):
+    for detection in detections:
+        label = detection.label
+        score = detection.score
+        box = detection.box
+        color = (0, 255, 255)  # Yellow color for bounding box
+        cv2.rectangle(image_with_insects, (box.xmin, box.ymin), (box.xmax, box.ymax), color, 2)
+        (text_width, text_height), baseline = cv2.getTextSize(f"{label}: {score:.2f}", cv2.FONT_HERSHEY_SIMPLEX, 0.5, 2)
+        cv2.rectangle(
+            image_with_insects,
+            (box.xmin, box.ymin - text_height - baseline),
+            (box.xmin + text_width, box.ymin),
+            color,
+            thickness=cv2.FILLED
+        )
+        cv2.putText(
+            image_with_insects,
+            f"{label}: {score:.2f}",
+            (box.xmin, box.ymin - baseline),
+            cv2.FONT_HERSHEY_SIMPLEX,
+            0.5,
+            (255, 255, 255),
+            2
+        )
+    return image_with_insects
+def process_image(image):
+    labels = ["insect"]
+    original_image, detections = grounded_segmentation(image, labels, threshold=0.3, polygon_refinement=True)
+    annotated_image = plot_detections(original_image, detections)
+    yellow_background_with_insects = create_yellow_background_with_insects(np.array(original_image), detections)
+    yellow_background_with_boxes = draw_classification_boxes(yellow_background_with_insects.copy(), detections)
+    return annotated_image, yellow_background_with_boxes
+gr.Interface(
+    fn=process_image,
+    inputs=gr.Image(type="pil"),
+    outputs=[gr.Image(type="numpy"), gr.Image(type="numpy")],
+    title="🐞 InsectSAM + GroundingDINO Inference",
+).launch()