Spaces:

dennistrujillo
/

MedSAMTest

Sleeping

App Files Files Community

Change prompt from a bounding box to point and click

by vincentgao95 - opened Jul 29

base: refs/heads/main

←

from: refs/pr/2

Discussion Files changed

+27

-32

Files changed (1) hide show

app.py +27 -32

app.py CHANGED Viewed

@@ -30,25 +30,23 @@ def load_image(file_path):
     return img, H, W
 @torch.no_grad()
-def medsam_inference(medsam_model, img_embed, box_1024, H, W):
-    box_torch = torch.as_tensor(box_1024, dtype=torch.float, device=img_embed.device)
-    if len(box_torch.shape) == 2:
-        box_torch = box_torch[:, None, :] # (B, 1, 4)
-    box_torch=box_torch.reshape(1,4)
     sparse_embeddings, dense_embeddings = medsam_model.prompt_encoder(
-        points=None,
-        boxes=box_torch,
         masks=None,
     )
     low_res_logits, _ = medsam_model.mask_decoder(
-        image_embeddings=img_embed, # (B, 256, 64, 64)
-        image_pe=medsam_model.prompt_encoder.get_dense_pe(), # (1, 256, 64, 64)
-        sparse_prompt_embeddings=sparse_embeddings, # (B, 2, 256)
-        dense_prompt_embeddings=dense_embeddings, # (B, 256, 64, 64)
         multimask_output=False,
-        )
     low_res_pred = torch.sigmoid(low_res_logits)  # (1, 1, 256, 256)
@@ -58,15 +56,16 @@ def medsam_inference(medsam_model, img_embed, box_1024, H, W):
         mode="bilinear",
         align_corners=False,
     )  # (1, 1, gt.shape)
-    low_res_pred = low_res_pred.squeeze().cpu().numpy()  # (256, 256)
     medsam_seg = (low_res_pred > 0.5).astype(np.uint8)
     return medsam_seg
 # Function for visualizing images with masks
-def visualize(image, mask, box):
     fig, ax = plt.subplots(1, 2, figsize=(10, 5))
     ax[0].imshow(image, cmap='gray')
-    ax[0].add_patch(plt.Rectangle((box[0], box[1]), box[2] - box[0], box[3] - box[1], edgecolor="red", facecolor="none"))
     ax[1].imshow(image, cmap='gray')
     ax[1].imshow(mask, alpha=0.5, cmap="jet")
     plt.tight_layout()
@@ -78,19 +77,18 @@ def visualize(image, mask, box):
     buf.seek(0)
     pil_img = Image.open(buf)
-    return pil_img
 # Main function for Gradio app
 def process_images(img_dict):
     device = 'cuda' if torch.cuda.is_available() else 'cpu'
     # Load and preprocess image
-    print(img_dict)
     img = img_dict['image']
-    points = img_dict['points'][0]  # Accessing the first (and possibly only) set of points
-    if len(points) >= 6:
-        x_min, y_min, x_max, y_max = points[0], points[1], points[3], points[4]
-    else:
-        raise ValueError("Insufficient data for bounding box coordinates.")
     image, H, W = img, img.shape[0], img.shape[1]
     if len(image.shape) == 2:
         image = np.repeat(image[:, :, None], 3, axis=-1)
@@ -106,20 +104,17 @@ def process_images(img_dict):
     medsam_model = medsam_model.to(device)
     medsam_model.eval()
-    # Generate image embedding
-    with torch.no_grad():
-        img_embed = medsam_model.image_encoder(image_tensor)
-    # Calculate resized box coordinates
-    scale_factors = np.array([1024 / W, 1024 / H, 1024 / W, 1024 / H])
-    box_1024 = np.array([x_min, y_min, x_max, y_max]) * scale_factors
     # Perform inference
-    mask = medsam_inference(medsam_model, img_embed, box_1024, H, W)
     # Visualization
-    visualization = visualize(image, mask, [x_min, y_min, x_max, y_max])
     return visualization
 # Set up Gradio interface
 iface = gr.Interface(
     fn=process_images,
@@ -130,7 +125,7 @@ iface = gr.Interface(
         gr.Image(type="pil", label="Processed Image")
     ],
     title="ROI Selection with MEDSAM",
-    description="Upload an image (including NRRD files) and select regions of interest for processing."
 )
 # Launch the interface

     return img, H, W
 @torch.no_grad()
+def medsam_inference(medsam_model, img_embed, points_1024, H, W):
+    points_torch = torch.as_tensor(points_1024, dtype=torch.float, device=img_embed.device)
+    points_torch = points_torch.reshape(1, -1, 2)  # (1, N, 2)
     sparse_embeddings, dense_embeddings = medsam_model.prompt_encoder(
+        points=points_torch,
+        boxes=None,
         masks=None,
     )
     low_res_logits, _ = medsam_model.mask_decoder(
+        image_embeddings=img_embed,  # (B, 256, 64, 64)
+        image_pe=medsam_model.prompt_encoder.get_dense_pe(),  # (1, 256, 64, 64)
+        sparse_prompt_embeddings=sparse_embeddings,  # (B, 2, 256)
+        dense_prompt_embeddings=dense_embeddings,  # (B, 256, 64, 64)
         multimask_output=False,
+    )
     low_res_pred = torch.sigmoid(low_res_logits)  # (1, 1, 256, 256)
         mode="bilinear",
         align_corners=False,
     )  # (1, 1, gt.shape)
+    low_res_pred = low_res_pred.squeeze().cpu().numpy()  # (H, W)
     medsam_seg = (low_res_pred > 0.5).astype(np.uint8)
     return medsam_seg
 # Function for visualizing images with masks
+def visualize(image, mask, points):
     fig, ax = plt.subplots(1, 2, figsize=(10, 5))
     ax[0].imshow(image, cmap='gray')
+    for point in points:
+        ax[0].plot(point[0], point[1], 'ro')  # Mark points on the image
     ax[1].imshow(image, cmap='gray')
     ax[1].imshow(mask, alpha=0.5, cmap="jet")
     plt.tight_layout()
     buf.seek(0)
     pil_img = Image.open(buf)
+    return pil_img
 # Main function for Gradio app
 def process_images(img_dict):
     device = 'cuda' if torch.cuda.is_available() else 'cpu'
     # Load and preprocess image
     img = img_dict['image']
+    points = img_dict['points']
+    if len(points) == 0:
+        raise ValueError("No points provided.")
     image, H, W = img, img.shape[0], img.shape[1]
     if len(image.shape) == 2:
         image = np.repeat(image[:, :, None], 3, axis=-1)
     medsam_model = medsam_model.to(device)
     medsam_model.eval()
+    # Calculate resized point coordinates
+    scale_factors = np.array([1024 / W, 1024 / H])
+    points_1024 = np.array(points) * scale_factors
     # Perform inference
+    mask = medsam_inference(medsam_model, img_embed, points_1024, H, W)
     # Visualization
+    visualization = visualize(image, mask, points)
     return visualization
 # Set up Gradio interface
 iface = gr.Interface(
     fn=process_images,
         gr.Image(type="pil", label="Processed Image")
     ],
     title="ROI Selection with MEDSAM",
+    description="Upload an image (including NRRD files) and select points of interest for processing."
 )
 # Launch the interface