from typing import Dict, List, Any from transformers import CLIPTokenizer, CLIPModel class EndpointHandler: def __init__(self, path=""): # self.model= load_model(path) hf_model_path = "openai/clip-vit-large-patch14" self.model = CLIPModel.from_pretrained(hf_model_path) self.tokenizer = CLIPTokenizer.from_pretrained(hf_model_path) def __call__(self, data: Dict[str, Any]) -> List[float]: """ data args: inputs (:obj: `str` | `PIL.Image` | `np.array`) kwargs Return: A :obj:`list` | `dict`: will be serialized and returned """ # pseudo token_inputs = self.tokenizer(data["inputs"], padding=True, return_tensors="pt") query_embed = self.model.get_text_features(**token_inputs) np_query_embed = query_embed.detach().cpu().numpy()[0].tolist() return np_query_embed if __name__ == "__main__": eh = EndpointHandler() print(eh({"inputs": "a dog"}))