florence-sam2-masking

Running on Zero

App Files Files Community

jiuface commited on Aug 22

Commit

bc3420d

•

1 Parent(s): 08430c8

add task prompt selection

Browse files

Files changed (5) hide show

app.py +28 -27
utils/__pycache__/__init__.cpython-310.pyc +0 -0
utils/__pycache__/florence.cpython-310.pyc +0 -0
utils/__pycache__/sam.cpython-310.pyc +0 -0
utils/florence.py +1 -0

app.py CHANGED Viewed

@@ -26,7 +26,7 @@ SAM_IMAGE_MODEL = load_sam_image_model(device=DEVICE)
 @spaces.GPU(duration=20)
 @torch.inference_mode()
 @torch.autocast(device_type="cuda", dtype=torch.bfloat16)
-def process_image(image_input, text_input) -> Optional[Image.Image]:
     if not image_input:
         gr.Info("Please upload an image.")
         return None
@@ -34,14 +34,13 @@ def process_image(image_input, text_input) -> Optional[Image.Image]:
     if not text_input:
         gr.Info("Please enter a text prompt.")
         return None
     _, result = run_florence_inference(
         model=FLORENCE_MODEL,
         processor=FLORENCE_PROCESSOR,
         device=DEVICE,
         image=image_input,
-        task=FLORENCE_OPEN_VOCABULARY_DETECTION_TASK,
-        text=text_input
     )
     detections = sv.Detections.from_lmm(
         lmm=sv.LMM.FLORENCE_2,
@@ -52,41 +51,43 @@ def process_image(image_input, text_input) -> Optional[Image.Image]:
     if len(detections) == 0:
         gr.Info("No objects detected.")
         return None
-    return Image.fromarray(detections.mask[0].astype("uint8") * 255)
 with gr.Blocks() as demo:
     with gr.Row():
         with gr.Column():
-            image_input_component = gr.Image(
-                type='pil', label='Upload image')
-            text_input_component = gr.Textbox(
-                label='Text prompt',
-                placeholder='Enter text prompts')
-            submit_button_component = gr.Button(
-                value='Submit', variant='primary')
         with gr.Column():
-            image_output_component = gr.Image(label='Output mask')
     submit_button_component.click(
         fn=process_image,
         inputs=[
-            image_input_component,
-            text_input_component
-        ],
-        outputs=[
-            image_output_component,
-        ]
-    )
-    text_input_component.submit(
-        fn=process_image,
-        inputs=[
-            image_input_component,
             text_input_component
         ],
-        outputs=[
-            image_output_component,
-        ]
     )
 demo.launch(debug=False, show_error=True)

 @spaces.GPU(duration=20)
 @torch.inference_mode()
 @torch.autocast(device_type="cuda", dtype=torch.bfloat16)
+def process_image(image_input, task_prompt, text_input) -> Optional[Image.Image]:
     if not image_input:
         gr.Info("Please upload an image.")
         return None
     if not text_input:
         gr.Info("Please enter a text prompt.")
         return None
     _, result = run_florence_inference(
         model=FLORENCE_MODEL,
         processor=FLORENCE_PROCESSOR,
         device=DEVICE,
         image=image_input,
+        task=text_input,
+        text=prompt
     )
     detections = sv.Detections.from_lmm(
         lmm=sv.LMM.FLORENCE_2,
     if len(detections) == 0:
         gr.Info("No objects detected.")
         return None
+    images = []
+    print("mask generated:", len(detections.mask))
+    for i in range(len(detections.mask)):
+        img = Image.fromarray(detections.mask[i].astype(np.uint8) * 255)
+        images.append(img)
+    return images
 with gr.Blocks() as demo:
     with gr.Row():
         with gr.Column():
+            image = gr.Image(type='pil', label='Upload image')
+            image_url =  gr.Textbox( label='Image url', placeholder='Enter text prompts (Optional)')
+            task_prompt = gr.Dropdown(
+                [
+                    "<CAPTION>",
+                    "<DETAILED_CAPTION>",
+                    "<MORE_DETAILED_CAPTION>",
+                    "<CAPTION_TO_PHRASE_GROUNDING>",
+                    "<OPEN_VOCABULARY_DETECTION>",
+                    '<DENSE_REGION_CAPTION>'
+                ], value="<CAPTION_TO_PHRASE_GROUNDING>", multiselect=True, label="Task Prompt", info="task prompts"
+            ),
+            text_input_component = gr.Textbox(label='Text prompt', placeholder='Enter text prompts')
+            submit_button_component = gr.Button(value='Submit', variant='primary')
         with gr.Column():
+            image_output_component = gr.Gallery(label="Generated images")
     submit_button_component.click(
         fn=process_image,
         inputs=[
+            image,
+            task_prompt,
             text_input_component
         ],
+        outputs=image_output_component
     )
 demo.launch(debug=False, show_error=True)

utils/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (125 Bytes). View file

utils/__pycache__/florence.cpython-310.pyc ADDED Viewed

Binary file (2.31 kB). View file

utils/__pycache__/sam.cpython-310.pyc ADDED Viewed

Binary file (1.39 kB). View file

utils/florence.py CHANGED Viewed

@@ -56,4 +56,5 @@ def run_florence_inference(
         generated_ids, skip_special_tokens=False)[0]
     response = processor.post_process_generation(
         generated_text, task=task, image_size=image.size)
     return generated_text, response

         generated_ids, skip_special_tokens=False)[0]
     response = processor.post_process_generation(
         generated_text, task=task, image_size=image.size)
+    print(generated_text, response)
     return generated_text, response