import gradio as gr import numpy as np import torch from PIL import Image, ImageDraw import requests from transformers import SamModel, SamProcessor import cv2 from typing import List device = "cuda" if torch.cuda.is_available() else "cpu" # Load model and processor model = SamModel.from_pretrained("facebook/sam-vit-base").to(device) processor = SamProcessor.from_pretrained("facebook/sam-vit-base") embedding = None def mask_2_dots(mask: np.ndarray) -> List[List[int]]: gray = cv2.cvtColor(mask, cv2.COLOR_RGB2GRAY) _, thresh = cv2.threshold(gray, 127, 255, 0) kernel = np.ones((5,5),np.uint8) closed = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel) contours, _ = cv2.findContours(closed, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) points = [] for contour in contours: moments = cv2.moments(contour) cx = int(moments['m10']/moments['m00']) cy = int(moments['m01']/moments['m00']) points.append([cx, cy]) return [points] @torch.no_grad() def foward_pass(image_input: np.ndarray, points: List[List[int]]) -> np.ndarray: global embedding image_input = Image.fromarray(image_input) inputs = processor(image_input, input_points=points, return_tensors="pt").to(device) if not isinstance(embedding, torch.Tensor): embedding = model.get_image_embeddings(inputs["pixel_values"]) del inputs["pixel_values"] outputs = model.forward(image_embeddings=embedding, **inputs) masks = processor.image_processor.post_process_masks( outputs.pred_masks.cpu(), inputs["original_sizes"].cpu(), inputs["reshaped_input_sizes"].cpu() ) masks = masks[0].squeeze(0).numpy().transpose(1, 2, 0) return masks def main_func(inputs) -> List[Image.Image]: dots = inputs['mask'] points = mask_2_dots(dots) image_input = inputs['image'] masks = foward_pass(image_input, points) image_input = Image.fromarray(image_input) draw = ImageDraw.Draw(image_input) for point in points[0]: draw.ellipse((point[0] - 10, point[1] - 10, point[0] + 10, point[1] + 10), fill="red") pred_masks = [image_input] for i in range(masks.shape[2]): pred_masks.append(Image.fromarray((masks[:,:,i] * 255).astype(np.uint8))) return pred_masks def reset_embedding(): global embedding embedding = None with gr.Blocks() as demo: gr.Markdown("# How to use") gr.Markdown("To start, input an image, then use the brush to create dots on the object which you want to segment, don't worry if your dots aren't perfect as the code will find the middle of each drawn item. Then press the segment button to create masks for the object that the dots are on.") gr.Markdown("# Demo to run Segment Anything base model") gr.Markdown("""This app uses the [Segment Anything](https://huggingface.co/facebook/sam-vit-base) model from Meta to get a mask from a points in an image. """) with gr.Tab("Flip Image"): with gr.Row(): image_input = gr.Image(tool='sketch') image_output = gr.Gallery() image_button = gr.Button("Segment Image") image_button.click(main_func, inputs=image_input, outputs=image_output) image_input.upload(reset_embedding) demo.launch()