Aliayub1995
/

VideoLLaMA2-7B

@@ -16,67 +16,28 @@ class EndpointHandler:
         self.model_path = 'DAMO-NLP-SG/VideoLLaMA2-7B'
         self.model, self.processor, self.tokenizer = model_init(self.model_path)
-    def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
-        """
-        Handle inference requests.
-        Args:
-            data (Dict[str, Any]): The input data for inference. Expected keys:
-                                   - 'modal' (str): 'video' or 'image'
-                                   - 'modal_path' (str): Path to the video or image file
-                                   - 'instruct' (str): The instruction/query to process
-        Returns:
-            List[Dict[str, Any]]: The output of the inference.
-        """
-        modal = data.get("modal", "video")
-        modal_path = data.get("modal_path", "")
-        instruct = data.get("instruct", "")
-        if not modal_path or not instruct:
-            raise ValueError("Both 'modal_path' and 'instruct' must be provided in the input data.")
-        # Perform inference
-        output = mm_infer(
-            self.processor[modal](modal_path),
-            instruct,
-            model=self.model,
-            tokenizer=self.tokenizer,
-            do_sample=False,
-            modal=modal
-        )
-        return [{"output": output}]
-# from transformers import pipeline
-# class EndpointHandler:
-#     def __init__(self, path: str = ""):
-#         """
-#         Initialize the handler by setting up the environment and loading the model.
-#         """
-#         # Use a pipeline as a high-level helper to download and load the model
-#         self.pipe = pipeline("visual-question-answering", model="DAMO-NLP-SG/VideoLLaMA2-8x7B")
-#         print("Model downloaded and pipeline created successfully.")
-#     def __call__(self, data):
-#         """
-#         Handle inference requests.
-#         Args:
-#             data (dict): Input data containing 'image' and 'question'.
-#         Returns:
-#             dict: The output from the model.
-#         """
-#         image = data.get("image")
-#         question = data.get("question")
-#         if not image or not question:
-#             raise ValueError("Both 'image' and 'question' must be provided in the input data.")
-#         # Use the pipeline to perform visual question answering
-#         output = self.pipe(image=image, question=question)
-#         return output

         self.model_path = 'DAMO-NLP-SG/VideoLLaMA2-7B'
         self.model, self.processor, self.tokenizer = model_init(self.model_path)
+def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
+    print(f"Received data: {data}")  # Debugging: Print received data
+    modal = data.get("modal", "video")
+    modal_path = data.get("modal_path", "")
+    instruct = data.get("instruct", "")
+    print(f"Modal: {modal}, Modal Path: {modal_path}, Instruct: {instruct}")  # Debugging: Print extracted values
+    if not modal_path or not instruct:
+        raise ValueError("Both 'modal_path' and 'instruct' must be provided in the input data.")
+    # Perform inference
+    output = mm_infer(
+        self.processor[modal](modal_path),
+        instruct,
+        model=self.model,
+        tokenizer=self.tokenizer,
+        do_sample=False,
+        modal=modal
+    )
+    return [{"output": output}]