English
detection
open-world
open-set
Inference Endpoints
File size: 1,174 Bytes
d846043
3d3cb53
d846043
22bf258
3d3cb53
1e32e1a
3d3cb53
873b855
a740a6e
873b855
 
 
 
22bf258
96e0ac2
d846043
 
46c271a
d846043
c56d19f
b435ec9
d846043
 
 
 
 
 
 
 
 
0172050
 
 
d846043
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39

import os
from typing import Dict, List, Any
import groundingdino
from groundingdino.util.inference import load_model, load_image, predict, annotate
import subprocess

# /app
HOME = os.getcwd()

# /opt/conda/lib/python3.9/site-packages/groundingdino
PACKAGE_HOME = os.path.dirname(groundingdino.__file__)

CONFIG_PATH = os.path.join(PACKAGE_HOME, "config", "GroundingDINO_SwinT_OGC.py")
WEIGHTS_PATH = os.path.join("model", "weights", "groundingdino_swint_ogc.pth")

class EndpointHandler():
    def __init__(self, path):
        # Preload all the elements you are going to need at inference.

        self.model = load_model(CONFIG_PATH, os.path.join(path, "weights", "groundingdino_swint_ogc.pth"))

    def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
        """
       data args:
            inputs (:obj: `str` | `PIL.Image` | `np.array`)
            kwargs
      Return:
            A :obj:`list` | `dict`: will be serialized and returned
        """
        inputs = data.pop("inputs")
        image = inputs.pop("image")
        prompt = inputs.pop("prompt")

        return [{
            "image": image,
            "prompt": prompt,
        }]