File size: 2,857 Bytes
87ce8f2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 |
from typing import Dict, List, Any
import sys
sys.path.append('./')
from videollama2 import model_init, mm_infer
from videollama2.utils import disable_torch_init
class EndpointHandler:
def __init__(self, path: str = ""):
"""
Initialize the handler by loading the model and any other necessary components.
Args:
path (str): The path to the model or other necessary files.
"""
disable_torch_init()
self.model_path = 'DAMO-NLP-SG/VideoLLaMA2-7B'
self.model, self.processor, self.tokenizer = model_init(self.model_path)
def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
"""
Handle inference requests.
Args:
data (Dict[str, Any]): The input data for inference. Expected keys:
- 'modal' (str): 'video' or 'image'
- 'modal_path' (str): Path to the video or image file
- 'instruct' (str): The instruction/query to process
Returns:
List[Dict[str, Any]]: The output of the inference.
"""
modal = data.get("modal", "video")
modal_path = data.get("modal_path", "")
instruct = data.get("instruct", "")
if not modal_path or not instruct:
raise ValueError("Both 'modal_path' and 'instruct' must be provided in the input data.")
# Perform inference
output = mm_infer(
self.processor[modal](modal_path),
instruct,
model=self.model,
tokenizer=self.tokenizer,
do_sample=False,
modal=modal
)
return [{"output": output}]
# from transformers import pipeline
# class EndpointHandler:
# def __init__(self, path: str = ""):
# """
# Initialize the handler by setting up the environment and loading the model.
# """
# # Use a pipeline as a high-level helper to download and load the model
# self.pipe = pipeline("visual-question-answering", model="DAMO-NLP-SG/VideoLLaMA2-8x7B")
# print("Model downloaded and pipeline created successfully.")
# def __call__(self, data):
# """
# Handle inference requests.
# Args:
# data (dict): Input data containing 'image' and 'question'.
# Returns:
# dict: The output from the model.
# """
# image = data.get("image")
# question = data.get("question")
# if not image or not question:
# raise ValueError("Both 'image' and 'question' must be provided in the input data.")
# # Use the pipeline to perform visual question answering
# output = self.pipe(image=image, question=question)
# return output
|