# Copyright (2024) Bytedance Ltd. and/or its affiliates # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # http://www.apache.org/licenses/LICENSE-2.0 # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # copy and modify from: https://github.com/OpenGVLab/Ask-Anything/blob/main/video_chat2/conversation.py from PIL import Image import torch from transformers import StoppingCriteria, StoppingCriteriaList from dataset.custom_data_parsers.utils import put_pred_to_data_dict, get_prompt_from_data_dict from dataset.tarsier_datamodule import TarsierDataProcessor from dataset.utils import * from enum import auto, Enum import os import re data_dict_tmp = { "messages": [ { "role": "user", "content": [ { "type": "video", "video": { "video_file": "/mnt/hdfs/vlm/videos/movies_aligned_0523/tt8266310/tt8266310_1.50.24-1.50.29.mp4"} }, { "type": "text", "text": "Describe the video in detail." } ] }, { "role": "assistant", "content": [ { "type": "text", "text": "A man in the driver's seat, wearing a black jacket with a maroon shirt, fastens his seatbelt while smiling at the man in the passenger seat, who is adjusting his position. The passenger, also wearing a black jacket with a maroon shirt, turns to look forward and smiles. The driver then leans forward to start the car and leans back in his seat. In the background, a beige car is visible through the window." }]} ], "dataset": "video_caption", "task": "video/caption", "idx": 0, } IMAGE_TOKEN = "" VIDEO_TOKEN = "