Spaces:

cyun9286
/

Align3R

Running on Zero

App Files Files Community

Align3R / dust3r /datasets /utils /cropping.py

cyun9286

Add application file

f53b39e 9 days ago

raw

history blame

7.6 kB

	# Copyright (C) 2024-present Naver Corporation. All rights reserved.
	# Licensed under CC BY-NC-SA 4.0 (non-commercial use only).
	#
	# --------------------------------------------------------
	# croppping utilities
	# --------------------------------------------------------
	import PIL.Image
	import os
	os.environ["OPENCV_IO_ENABLE_OPENEXR"] = "1"
	import cv2 # noqa
	import numpy as np # noqa
	from dust3r.utils.geometry import colmap_to_opencv_intrinsics, opencv_to_colmap_intrinsics # noqa
	try:
	lanczos = PIL.Image.Resampling.LANCZOS
	bicubic = PIL.Image.Resampling.BICUBIC
	except AttributeError:
	lanczos = PIL.Image.LANCZOS
	bicubic = PIL.Image.BICUBIC


	class ImageList:
	""" Convenience class to aply the same operation to a whole set of images.
	"""

	def __init__(self, images):
	if not isinstance(images, (tuple, list, set)):
	images = [images]
	self.images = []
	for image in images:
	if not isinstance(image, PIL.Image.Image):
	image = PIL.Image.fromarray(image)
	self.images.append(image)

	def __len__(self):
	return len(self.images)

	def to_pil(self):
	return tuple(self.images) if len(self.images) > 1 else self.images[0]

	@property
	def size(self):
	sizes = [im.size for im in self.images]
	assert all(sizes[0] == s for s in sizes)
	return sizes[0]

	def resize(self, args, *kwargs):
	return ImageList(self._dispatch('resize', args, *kwargs))

	def crop(self, args, *kwargs):
	return ImageList(self._dispatch('crop', args, *kwargs))

	def _dispatch(self, func, args, *kwargs):
	return [getattr(im, func)(args, *kwargs) for im in self.images]


	def rescale_image_depthmap(image, depthmap, pred_depth, camera_intrinsics, output_resolution, force=True):
	""" Jointly rescale a (image, depthmap)
	so that (out_width, out_height) >= output_res
	"""
	image = ImageList(image)
	input_resolution = np.array(image.size) # (W,H)
	output_resolution = np.array(output_resolution)
	if depthmap is not None:
	# can also use this with masks instead of depthmaps
	assert tuple(depthmap.shape[:2]) == image.size[::-1]
	if pred_depth is not None:
	# can also use this with masks instead of depthmaps
	assert tuple(pred_depth.shape[:2]) == image.size[::-1]
	# define output resolution
	assert output_resolution.shape == (2,)
	scale_final = max(output_resolution / image.size) + 1e-8
	if scale_final >= 1 and not force: # image is already smaller than what is asked
	return (image.to_pil(), depthmap, pred_depth, camera_intrinsics)
	output_resolution = np.floor(input_resolution * scale_final).astype(int)
	output_resolution = list(output_resolution)
	# first rescale the image so that it contains the crop
	image = image.resize(output_resolution, resample=lanczos if scale_final < 1 else bicubic)
	if depthmap is not None:
	depthmap = cv2.resize(depthmap, output_resolution, fx=scale_final,
	fy=scale_final, interpolation=cv2.INTER_NEAREST)
	if pred_depth is not None:
	pred_depth = cv2.resize(pred_depth, output_resolution, fx=scale_final,
	fy=scale_final, interpolation=cv2.INTER_NEAREST)

	# no offset here; simple rescaling
	camera_intrinsics = camera_matrix_of_crop(
	camera_intrinsics, input_resolution, output_resolution, scaling=scale_final)

	return image.to_pil(), depthmap, pred_depth, camera_intrinsics


	def camera_matrix_of_crop(input_camera_matrix, input_resolution, output_resolution, scaling=1, offset_factor=0.5, offset=None):
	# Margins to offset the origin
	margins = np.asarray(input_resolution) * scaling - output_resolution
	assert np.all(margins >= 0.0)
	if offset is None:
	offset = offset_factor * margins

	# Generate new camera parameters
	output_camera_matrix_colmap = opencv_to_colmap_intrinsics(input_camera_matrix)
	output_camera_matrix_colmap[:2, :] *= scaling
	output_camera_matrix_colmap[:2, 2] -= offset
	output_camera_matrix = colmap_to_opencv_intrinsics(output_camera_matrix_colmap)

	return output_camera_matrix


	def crop_image_depthmap(image, depthmap, pred_depth, camera_intrinsics, crop_bbox):
	"""
	Return a crop of the input view.
	"""
	image = ImageList(image)
	l, t, r, b = crop_bbox

	image = image.crop((l, t, r, b))
	depthmap = depthmap[t:b, l:r]
	pred_depth = pred_depth[t:b, l:r, :]
	camera_intrinsics = camera_intrinsics.copy()
	camera_intrinsics[0, 2] -= l
	camera_intrinsics[1, 2] -= t

	return image.to_pil(), depthmap, pred_depth, camera_intrinsics


	def bbox_from_intrinsics_in_out(input_camera_matrix, output_camera_matrix, output_resolution):
	out_width, out_height = output_resolution
	l, t = np.int32(np.round(input_camera_matrix[:2, 2] - output_camera_matrix[:2, 2]))
	crop_bbox = (l, t, l + out_width, t + out_height)
	return crop_bbox

	def center_crop_image_depthmap(image, depthmap, pred_depth, camera_intrinsics, crop_scale):
	"""
	Jointly center-crop an image and its depthmap, and adjust the camera intrinsics accordingly.

	Parameters:
	- image: PIL.Image or similar, the input image.
	- depthmap: np.ndarray, the corresponding depth map.
	- camera_intrinsics: np.ndarray, the 3x3 camera intrinsics matrix.
	- crop_scale: float between 0 and 1, the fraction of the image to keep.

	Returns:
	- cropped_image: PIL.Image, the center-cropped image.
	- cropped_depthmap: np.ndarray, the center-cropped depth map.
	- adjusted_intrinsics: np.ndarray, the adjusted camera intrinsics matrix.
	"""
	# Ensure crop_scale is valid
	assert 0 < crop_scale <= 1, "crop_scale must be between 0 and 1"

	# Convert image to ImageList for consistent processing
	image = ImageList(image)
	input_resolution = np.array(image.size) # (width, height)
	if depthmap is not None:
	# Ensure depthmap matches the image size
	assert depthmap.shape[:2] == tuple(image.size[::-1]), "Depthmap size must match image size"
	if pred_depth is not None:
	# Ensure pred_depth matches the image size
	assert pred_depth.shape[:2] == tuple(image.size[::-1]), "pred_depth size must match image size"
	# Compute output resolution after cropping
	output_resolution = np.floor(input_resolution * crop_scale).astype(int)
	# get the correct crop_scale
	crop_scale = output_resolution / input_resolution

	# Compute margins (amount to crop from each side)
	margins = input_resolution - output_resolution
	offset = margins / 2 # Since we are center cropping

	# Calculate the crop bounding box
	l, t = offset.astype(int)
	r = l + output_resolution[0]
	b = t + output_resolution[1]
	crop_bbox = (l, t, r, b)

	# Crop the image and depthmap
	image = image.crop(crop_bbox)
	if depthmap is not None:
	depthmap = depthmap[t:b, l:r]
	if pred_depth is not None:
	pred_depth = pred_depth[t:b, l:r, :]
	# Adjust the camera intrinsics
	adjusted_intrinsics = camera_intrinsics.copy()

	# Adjust focal lengths (fx, fy) # no need to adjust focal lengths for cropping
	# adjusted_intrinsics[0, 0] /= crop_scale[0] # fx
	# adjusted_intrinsics[1, 1] /= crop_scale[1] # fy

	# Adjust principal point (cx, cy)
	adjusted_intrinsics[0, 2] -= l # cx
	adjusted_intrinsics[1, 2] -= t # cy

	return image.to_pil(), depthmap, pred_depth, adjusted_intrinsics