import os from typing import Dict, List, Any from groundingdino.util.inference import load_model, load_image, predict, annotate HOME = os.getcwd() CONFIG_PATH = os.path.join(HOME, "GroundingDINO/groundingdino/config/GroundingDINO_SwinT_OGC.py") WEIGHTS_PATH = os.path.join(HOME, "weights", "groundingdino_swint_ogc.pth") class EndpointHandler(): def __init__(self): # Preload all the elements you are going to need at inference. self.model = load_model(CONFIG_PATH, WEIGHTS_PATH) def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]: """ data args: inputs (:obj: `str` | `PIL.Image` | `np.array`) kwargs Return: A :obj:`list` | `dict`: will be serialized and returned """ inputs = data.pop("inputs") image = inputs.pop("image") prompt = inputs.pop("prompt") return [{ "image": image, "prompt": prompt, }]