florence-sam2-masking

Running on Zero

App Files Files Community

SkalskiP commited on Aug 14

Commit

0691c7d

•

1 Parent(s): 2c71d17

make it return 0 or 1 mask

Browse files

Files changed (1) hide show

app.py +26 -35

app.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from typing import List
 import gradio as gr
 import spaces
@@ -26,43 +26,34 @@ SAM_IMAGE_MODEL = load_sam_image_model(device=DEVICE)
 @spaces.GPU
 @torch.inference_mode()
 @torch.autocast(device_type="cuda", dtype=torch.bfloat16)
-def process_image(
-    image_input, text_input
-) -> List[Image.Image]:
     if not image_input:
         gr.Info("Please upload an image.")
-        return []
     if not text_input:
         gr.Info("Please enter a text prompt.")
-        return []
-    texts = [prompt.strip() for prompt in text_input.split(",")]
-    detections_list = []
-    for text in texts:
-        _, result = run_florence_inference(
-            model=FLORENCE_MODEL,
-            processor=FLORENCE_PROCESSOR,
-            device=DEVICE,
-            image=image_input,
-            task=FLORENCE_OPEN_VOCABULARY_DETECTION_TASK,
-            text=text
-        )
-        detections = sv.Detections.from_lmm(
-            lmm=sv.LMM.FLORENCE_2,
-            result=result,
-            resolution_wh=image_input.size
-        )
-        detections = run_sam_inference(SAM_IMAGE_MODEL, image_input, detections)
-        detections_list.append(detections)
-    detections = sv.Detections.merge(detections_list)
     detections = run_sam_inference(SAM_IMAGE_MODEL, image_input, detections)
-    return [
-        Image.fromarray(mask.astype("uint8") * 255)
-        for mask
-        in detections.mask
-    ]
 with gr.Blocks() as demo:
@@ -72,11 +63,11 @@ with gr.Blocks() as demo:
                 type='pil', label='Upload image')
             text_input_component = gr.Textbox(
                 label='Text prompt',
-                placeholder='Enter comma separated text prompts')
             submit_button_component = gr.Button(
                 value='Submit', variant='primary')
         with gr.Column():
-            gallery_output_component = gr.Gallery(label='Output masks')
     submit_button_component.click(
         fn=process_image,
@@ -85,7 +76,7 @@ with gr.Blocks() as demo:
             text_input_component
         ],
         outputs=[
-            gallery_output_component,
         ]
     )
     text_input_component.submit(
@@ -95,7 +86,7 @@ with gr.Blocks() as demo:
             text_input_component
         ],
         outputs=[
-            gallery_output_component,
         ]
     )

+from typing import Optional
 import gradio as gr
 import spaces
 @spaces.GPU
 @torch.inference_mode()
 @torch.autocast(device_type="cuda", dtype=torch.bfloat16)
+def process_image(image_input, text_input) -> Optional[Image.Image]:
     if not image_input:
         gr.Info("Please upload an image.")
+        return None
     if not text_input:
         gr.Info("Please enter a text prompt.")
+        return None
+    _, result = run_florence_inference(
+        model=FLORENCE_MODEL,
+        processor=FLORENCE_PROCESSOR,
+        device=DEVICE,
+        image=image_input,
+        task=FLORENCE_OPEN_VOCABULARY_DETECTION_TASK,
+        text=text_input
+    )
+    detections = sv.Detections.from_lmm(
+        lmm=sv.LMM.FLORENCE_2,
+        result=result,
+        resolution_wh=image_input.size
+    )
+    detections = run_sam_inference(SAM_IMAGE_MODEL, image_input, detections)
     detections = run_sam_inference(SAM_IMAGE_MODEL, image_input, detections)
+    if len(detections) == 0:
+        gr.Info("No objects detected.")
+        return None
+    return Image.fromarray(detections.mask[0].astype("uint8") * 255)
 with gr.Blocks() as demo:
                 type='pil', label='Upload image')
             text_input_component = gr.Textbox(
                 label='Text prompt',
+                placeholder='Enter text prompts')
             submit_button_component = gr.Button(
                 value='Submit', variant='primary')
         with gr.Column():
+            image_output_component = gr.Image(label='Output mask')
     submit_button_component.click(
         fn=process_image,
             text_input_component
         ],
         outputs=[
+            image_output_component,
         ]
     )
     text_input_component.submit(
             text_input_component
         ],
         outputs=[
+            image_output_component,
         ]
     )