import os from typing import Dict, List, Any import groundingdino from groundingdino.util.inference import load_model, load_image, predict, annotate # /app HOME = os.getcwd() # /opt/conda/lib/python3.9/site-packages/groundingdino PACKAGE_HOME = os.path.dirname(groundingdino.__file__) CONFIG_PATH = os.path.join(PACKAGE_HOME, "config", "GroundingDINO_SwinT_OGC.py") WEIGHTS_PATH = os.path.join(HOME, "weights", "groundingdino_swint_ogc.pth") class EndpointHandler(): def __init__(self, path): # Preload all the elements you are going to need at inference. self.model = load_model(CONFIG_PATH, WEIGHTS_PATH) def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]: """ data args: inputs (:obj: `str` | `PIL.Image` | `np.array`) kwargs Return: A :obj:`list` | `dict`: will be serialized and returned """ inputs = data.pop("inputs") image = inputs.pop("image") prompt = inputs.pop("prompt") return [{ "image": image, "prompt": prompt, }]