Salesforce
/

instructblip-flan-t5-xl

+import base64
+import torch
+from transformers import InstructBlipForConditionalGeneration, InstructBlipTokenizer
+class InstructBlipHandler:
+    def __init__(self, model, tokenizer):
+        self.model = model
+        self.tokenizer = tokenizer
+    def __call__(self, input_data):
+        # Preprocess the input data
+        inputs = self.preprocess(input_data)
+        # Generate the output using the model
+        outputs = self.model.generate(**inputs)
+        # Postprocess the output
+        result = self.postprocess(outputs)
+        return result
+    def preprocess(self, input_data):
+        image_data = input_data["image"]
+        text_prompt = input_data["text"]
+        image = torch.tensor(base64.b64decode(image_data)).unsqueeze(0)
+        text_inputs = self.tokenizer(text_prompt, return_tensors="pt")
+        inputs = {
+            "input_ids": text_inputs["input_ids"],
+            "attention_mask": text_inputs["attention_mask"],
+            "pixel_values": image
+        }
+        return inputs
+    def postprocess(self, outputs):
+        return self.tokenizer.batch_decode(outputs, skip_special_tokens=True)
+model = InstructBlipForConditionalGeneration.from_pretrained("Salesforce/instructblip-flan-t5-xl")
+tokenizer = InstructBlipTokenizer.from_pretrained("Salesforce/instructblip-flan-t5-xl")
+handler = InstructBlipHandler(model, tokenizer)