Spaces:

CVPR
/

regionclip-demo

Runtime error

regionclip-demo / detectron2 /data /transforms /build.py

jwyang

first commit

4121bec over 2 years ago

3.29 kB

	# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
	# from . import transforms as T
	import torchvision.transforms as T
	from PIL import Image
	from timm.data import create_transform
	from .torchvision_transforms.transforms import Resize as New_Resize

	def build_clip_transforms(cfg, is_train=True):
	if cfg.AUG.USE_TIMM and is_train:
	print('=> use timm transform for training')
	timm_cfg = cfg.AUG.TIMM_AUG
	transforms = create_transform(
	input_size=cfg.TRAIN.IMAGE_SIZE[0],
	is_training=True,
	use_prefetcher=False,
	no_aug=False,
	re_prob=timm_cfg.RE_PROB,
	re_mode=timm_cfg.RE_MODE,
	re_count=timm_cfg.RE_COUNT,
	scale=cfg.AUG.SCALE,
	ratio=cfg.AUG.RATIO,
	hflip=timm_cfg.HFLIP,
	vflip=timm_cfg.VFLIP,
	color_jitter=timm_cfg.COLOR_JITTER,
	auto_augment=timm_cfg.AUTO_AUGMENT,
	interpolation=timm_cfg.INTERPOLATION,
	mean=cfg.MODEL.PIXEL_MEAN,
	std=cfg.MODEL.PIXEL_STD,
	)

	return transforms

	# normalize_transform = T.Normalize((0.48145466, 0.4578275, 0.40821073), (0.26862954, 0.26130258, 0.27577711))
	# assert isinstance(cfg.DATASET.OUTPUT_SIZE, (list, tuple)), 'DATASET.OUTPUT_SIZE should be list or tuple'
	# NOTE: normalization is applied in rcnn.py, to keep consistent as Detectron2
	# normalize = T.Normalize(mean=cfg.MODEL.PIXEL_MEAN, std=cfg.MODEL.PIXEL_STD) # T.Normalize(mean=cfg.INPUT.PIXEL_MEAN, std=cfg.INPUT.PIXEL_STD)

	transforms = None
	if is_train:
	aug = cfg.AUG
	scale = aug.SCALE
	ratio = aug.RATIO
	if len(cfg.AUG.TRAIN.IMAGE_SIZE) == 2: # Data Augmentation from MSR-CLIP
	ts = [
	T.RandomResizedCrop(
	cfg.AUG.TRAIN.IMAGE_SIZE[0], scale=scale, ratio=ratio,
	interpolation=cfg.AUG.INTERPOLATION
	),
	T.RandomHorizontalFlip(),
	]
	elif len(cfg.AUG.TRAIN.IMAGE_SIZE) == 1 and cfg.AUG.TRAIN.MAX_SIZE is not None: # designed for pretraining fastrcnn
	ts = [
	New_Resize(
	cfg.AUG.TRAIN.IMAGE_SIZE[0], max_size=cfg.AUG.TRAIN.MAX_SIZE,
	interpolation=cfg.AUG.INTERPOLATION
	),
	T.RandomHorizontalFlip(),
	]

	cj = aug.COLOR_JITTER
	if cj[-1] > 0.0:
	ts.append(T.RandomApply([T.ColorJitter(*cj[:-1])], p=cj[-1]))

	gs = aug.GRAY_SCALE
	if gs > 0.0:
	ts.append(T.RandomGrayscale(gs))

	gb = aug.GAUSSIAN_BLUR
	if gb > 0.0:
	ts.append(T.RandomApply([GaussianBlur([.1, 2.])], p=gb))

	ts.append(T.ToTensor())
	# NOTE: normalization is applied in rcnn.py, to keep consistent as Detectron2
	#ts.append(normalize)

	transforms = T.Compose(ts)
	else:
	# for zeroshot inference of grounding evaluation
	transforms = T.Compose([
	T.Resize(
	cfg.AUG.TEST.IMAGE_SIZE[0],
	interpolation=cfg.AUG.TEST.INTERPOLATION
	),
	T.ToTensor(),
	])
	return transforms

	return transforms