kelvinou01
/

GroundingDINO

Inference Endpoints

Model card Files Files and versions Community

kelvinou01 commited on Mar 11

Commit

841a649

•

1 Parent(s): b435ec9

Update handler

Files changed (1) hide show

handler.py +29 -7

handler.py CHANGED Viewed

@@ -1,9 +1,12 @@
 import os
 from typing import Dict, List, Any
 import groundingdino
 from groundingdino.util.inference import load_model, load_image, predict, annotate
-import subprocess
 # /app
 HOME = os.getcwd()
@@ -20,6 +23,9 @@ class EndpointHandler():
         self.model = load_model(CONFIG_PATH, os.path.join(path, "weights", "groundingdino_swint_ogc.pth"))
     def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
         """
        data args:
@@ -29,10 +35,26 @@ class EndpointHandler():
             A :obj:`list` | `dict`: will be serialized and returned
         """
         inputs = data.pop("inputs")
-        image = inputs.pop("image")
         prompt = inputs.pop("prompt")
-        return [{
-            "image": image,
-            "prompt": prompt,
-        }]

+import base64
+from io import BytesIO
 import os
 from typing import Dict, List, Any
+import cv2
 import groundingdino
 from groundingdino.util.inference import load_model, load_image, predict, annotate
+import tempfile
 # /app
 HOME = os.getcwd()
         self.model = load_model(CONFIG_PATH, os.path.join(path, "weights", "groundingdino_swint_ogc.pth"))
+        self.box_threshold = 0.35
+        self.text_threshold = 0.25
     def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
         """
        data args:
             A :obj:`list` | `dict`: will be serialized and returned
         """
         inputs = data.pop("inputs")
+        image_base64 = inputs.pop("image")
         prompt = inputs.pop("prompt")
+        image_data = base64.b64decode(image_base64)
+        with tempfile.NamedTemporaryFile(suffix=".jpg", delete=True) as f:
+            f.write(image_data)
+            image_source, image = load_image(f.name)
+            boxes, logits, phrases = predict(
+                model=self.model,
+                image=image,
+                caption=prompt,
+                box_threshold=self.box_threshold,
+                text_threshold=self.text_threshold
+            )
+            annotated_frame = annotate(image_source=image_source, boxes=boxes, logits=logits, phrases=phrases)
+            _, annotated_image = cv2.imencode(".jpg", annotated_frame)
+            annotated_image_b64 = base64.b64encode(annotated_image).decode("utf-8")
+            return [{
+                "image": annotated_image_b64,
+                "prompt": prompt,
+            }]