Spaces:
Running
Running
import cv2 | |
import numpy as np | |
import torch | |
def read_image(path, grayscale=False): | |
"""Read an image from path as RGB or grayscale""" | |
mode = cv2.IMREAD_GRAYSCALE if grayscale else cv2.IMREAD_COLOR | |
image = cv2.imread(str(path), mode) | |
if image is None: | |
raise IOError(f"Could not read image at {path}.") | |
if not grayscale: | |
image = image[..., ::-1] | |
return image | |
def numpy_image_to_torch(image): | |
"""Normalize the image tensor and reorder the dimensions.""" | |
if image.ndim == 3: | |
image = image.transpose((2, 0, 1)) # HxWxC to CxHxW | |
elif image.ndim == 2: | |
image = image[None] # add channel axis | |
else: | |
raise ValueError(f"Not an image: {image.shape}") | |
return torch.tensor(image / 255.0, dtype=torch.float) | |
def rotate_intrinsics(K, image_shape, rot): | |
"""image_shape is the shape of the image after rotation""" | |
assert rot <= 3 | |
h, w = image_shape[:2][:: -1 if (rot % 2) else 1] | |
fx, fy, cx, cy = K[0, 0], K[1, 1], K[0, 2], K[1, 2] | |
rot = rot % 4 | |
if rot == 1: | |
return np.array( | |
[[fy, 0.0, cy], [0.0, fx, w - cx], [0.0, 0.0, 1.0]], dtype=K.dtype | |
) | |
elif rot == 2: | |
return np.array( | |
[[fx, 0.0, w - cx], [0.0, fy, h - cy], [0.0, 0.0, 1.0]], | |
dtype=K.dtype, | |
) | |
else: # if rot == 3: | |
return np.array( | |
[[fy, 0.0, h - cy], [0.0, fx, cx], [0.0, 0.0, 1.0]], dtype=K.dtype | |
) | |
def rotate_pose_inplane(i_T_w, rot): | |
rotation_matrices = [ | |
np.array( | |
[ | |
[np.cos(r), -np.sin(r), 0.0, 0.0], | |
[np.sin(r), np.cos(r), 0.0, 0.0], | |
[0.0, 0.0, 1.0, 0.0], | |
[0.0, 0.0, 0.0, 1.0], | |
], | |
dtype=np.float32, | |
) | |
for r in [np.deg2rad(d) for d in (0, 270, 180, 90)] | |
] | |
return np.dot(rotation_matrices[rot], i_T_w) | |
def scale_intrinsics(K, scales): | |
"""Scale intrinsics after resizing the corresponding image.""" | |
scales = np.diag(np.concatenate([scales, [1.0]])) | |
return np.dot(scales.astype(K.dtype, copy=False), K) | |
def get_divisible_wh(w, h, df=None): | |
if df is not None: | |
w_new, h_new = map(lambda x: int(x // df * df), [w, h]) | |
else: | |
w_new, h_new = w, h | |
return w_new, h_new | |
def resize(image, size, fn=None, interp="linear", df=None): | |
"""Resize an image to a fixed size, or according to max or min edge.""" | |
h, w = image.shape[:2] | |
if isinstance(size, int): | |
scale = size / fn(h, w) | |
h_new, w_new = int(round(h * scale)), int(round(w * scale)) | |
w_new, h_new = get_divisible_wh(w_new, h_new, df) | |
scale = (w_new / w, h_new / h) | |
elif isinstance(size, (tuple, list)): | |
h_new, w_new = size | |
scale = (w_new / w, h_new / h) | |
else: | |
raise ValueError(f"Incorrect new size: {size}") | |
mode = { | |
"linear": cv2.INTER_LINEAR, | |
"cubic": cv2.INTER_CUBIC, | |
"nearest": cv2.INTER_NEAREST, | |
"area": cv2.INTER_AREA, | |
}[interp] | |
return cv2.resize(image, (w_new, h_new), interpolation=mode), scale | |
def crop(image, size, random=True, other=None, K=None, return_bbox=False): | |
"""Random or deterministic crop of an image, adjust depth and intrinsics.""" | |
h, w = image.shape[:2] | |
h_new, w_new = (size, size) if isinstance(size, int) else size | |
top = np.random.randint(0, h - h_new + 1) if random else 0 | |
left = np.random.randint(0, w - w_new + 1) if random else 0 | |
image = image[top : top + h_new, left : left + w_new] | |
ret = [image] | |
if other is not None: | |
ret += [other[top : top + h_new, left : left + w_new]] | |
if K is not None: | |
K[0, 2] -= left | |
K[1, 2] -= top | |
ret += [K] | |
if return_bbox: | |
ret += [(top, top + h_new, left, left + w_new)] | |
return ret | |
def zero_pad(size, *images): | |
"""zero pad images to size x size""" | |
ret = [] | |
for image in images: | |
if image is None: | |
ret.append(None) | |
continue | |
h, w = image.shape[:2] | |
padded = np.zeros((size, size) + image.shape[2:], dtype=image.dtype) | |
padded[:h, :w] = image | |
ret.append(padded) | |
return ret | |