|
import json |
|
import cv2 |
|
import numpy as np |
|
import os |
|
from torch.utils.data import Dataset |
|
from PIL import Image |
|
import cv2 |
|
from .data_utils import * |
|
from .base import BaseDataset |
|
from pycocotools import mask as mask_utils |
|
|
|
class UVOValDataset(BaseDataset): |
|
def __init__(self, image_dir, video_json, image_json): |
|
json_path = video_json |
|
with open(json_path, 'r') as fcc_file: |
|
data = json.load(fcc_file) |
|
image_json_path = image_json |
|
with open(image_json_path , 'r') as image_file: |
|
video_dict = json.load(image_file) |
|
self.image_root = image_dir |
|
self.data = data['annotations'] |
|
self.video_dict = video_dict |
|
self.size = (512,512) |
|
self.clip_size = (224,224) |
|
self.dynamic = 1 |
|
|
|
def __len__(self): |
|
return 8000 |
|
|
|
def __getitem__(self, idx): |
|
while(1): |
|
idx = np.random.randint(0, len(self.data)-1) |
|
try: |
|
item = self.get_sample(idx) |
|
return item |
|
except: |
|
idx = np.random.randint(0, len(self.data)-1) |
|
|
|
def check_region_size(self, image, yyxx, ratio, mode = 'max'): |
|
pass_flag = True |
|
H,W = image.shape[0], image.shape[1] |
|
H,W = H * ratio, W * ratio |
|
y1,y2,x1,x2 = yyxx |
|
h,w = y2-y1,x2-x1 |
|
if mode == 'max': |
|
if h > H and w > W: |
|
pass_flag = False |
|
elif mode == 'min': |
|
if h < H and w < W: |
|
pass_flag = False |
|
return pass_flag |
|
|
|
def get_sample(self, idx): |
|
ins_anno = self.data[idx] |
|
video_id = str(ins_anno['video_id']) |
|
|
|
video_names = self.video_dict[video_id] |
|
masks = ins_anno['segmentations'] |
|
frames = video_names |
|
|
|
|
|
min_interval = len(frames) // 5 |
|
start_frame_index = np.random.randint(low=0, high=len(frames) - min_interval) |
|
end_frame_index = start_frame_index + np.random.randint(min_interval, len(frames) - start_frame_index ) |
|
end_frame_index = min(end_frame_index, len(frames) - 1) |
|
|
|
|
|
ref_image_name = frames[start_frame_index] |
|
tar_image_name = frames[end_frame_index] |
|
ref_image_path = os.path.join(self.image_root, ref_image_name) |
|
tar_image_path = os.path.join(self.image_root, tar_image_name) |
|
|
|
|
|
ref_image = cv2.imread(ref_image_path) |
|
ref_image = cv2.cvtColor(ref_image, cv2.COLOR_BGR2RGB) |
|
|
|
tar_image = cv2.imread(tar_image_path) |
|
tar_image = cv2.cvtColor(tar_image, cv2.COLOR_BGR2RGB) |
|
|
|
ref_mask = mask_utils.decode(masks[start_frame_index]) |
|
tar_mask = mask_utils.decode(masks[end_frame_index]) |
|
|
|
item_with_collage = self.process_pairs(ref_image, ref_mask, tar_image, tar_mask) |
|
sampled_time_steps = self.sample_timestep() |
|
item_with_collage['time_steps'] = sampled_time_steps |
|
return item_with_collage |
|
|
|
|