Vincentqyw
fix: roma
c74a070
# %BANNER_BEGIN%
# ---------------------------------------------------------------------
# %COPYRIGHT_BEGIN%
#
# Magic Leap, Inc. ("COMPANY") CONFIDENTIAL
#
# Unpublished Copyright (c) 2020
# Magic Leap, Inc., All Rights Reserved.
#
# NOTICE: All information contained herein is, and remains the property
# of COMPANY. The intellectual and technical concepts contained herein
# are proprietary to COMPANY and may be covered by U.S. and Foreign
# Patents, patents in process, and are protected by trade secret or
# copyright law. Dissemination of this information or reproduction of
# this material is strictly forbidden unless prior written permission is
# obtained from COMPANY. Access to the source code contained herein is
# hereby forbidden to anyone except current COMPANY employees, managers
# or contractors who have executed Confidentiality and Non-disclosure
# agreements explicitly covering such access.
#
# The copyright notice above does not evidence any actual or intended
# publication or disclosure of this source code, which includes
# information that is confidential and/or proprietary, and is a trade
# secret, of COMPANY. ANY REPRODUCTION, MODIFICATION, DISTRIBUTION,
# PUBLIC PERFORMANCE, OR PUBLIC DISPLAY OF OR THROUGH USE OF THIS
# SOURCE CODE WITHOUT THE EXPRESS WRITTEN CONSENT OF COMPANY IS
# STRICTLY PROHIBITED, AND IN VIOLATION OF APPLICABLE LAWS AND
# INTERNATIONAL TREATIES. THE RECEIPT OR POSSESSION OF THIS SOURCE
# CODE AND/OR RELATED INFORMATION DOES NOT CONVEY OR IMPLY ANY RIGHTS
# TO REPRODUCE, DISCLOSE OR DISTRIBUTE ITS CONTENTS, OR TO MANUFACTURE,
# USE, OR SELL ANYTHING THAT IT MAY DESCRIBE, IN WHOLE OR IN PART.
#
# %COPYRIGHT_END%
# ----------------------------------------------------------------------
# %AUTHORS_BEGIN%
#
# Originating Authors: Paul-Edouard Sarlin
# Daniel DeTone
# Tomasz Malisiewicz
#
# %AUTHORS_END%
# --------------------------------------------------------------------*/
# %BANNER_END%
from pathlib import Path
import time
from collections import OrderedDict
from threading import Thread
import numpy as np
import cv2
import torch
import matplotlib.pyplot as plt
import matplotlib
matplotlib.use("Agg")
class AverageTimer:
"""Class to help manage printing simple timing of code execution."""
def __init__(self, smoothing=0.3, newline=False):
self.smoothing = smoothing
self.newline = newline
self.times = OrderedDict()
self.will_print = OrderedDict()
self.reset()
def reset(self):
now = time.time()
self.start = now
self.last_time = now
for name in self.will_print:
self.will_print[name] = False
def update(self, name="default"):
now = time.time()
dt = now - self.last_time
if name in self.times:
dt = self.smoothing * dt + (1 - self.smoothing) * self.times[name]
self.times[name] = dt
self.will_print[name] = True
self.last_time = now
def print(self, text="Timer"):
total = 0.0
print("[{}]".format(text), end=" ")
for key in self.times:
val = self.times[key]
if self.will_print[key]:
print("%s=%.3f" % (key, val), end=" ")
total += val
print("total=%.3f sec {%.1f FPS}" % (total, 1.0 / total), end=" ")
if self.newline:
print(flush=True)
else:
print(end="\r", flush=True)
self.reset()
class VideoStreamer:
"""Class to help process image streams. Four types of possible inputs:"
1.) USB Webcam.
2.) An IP camera
3.) A directory of images (files in directory matching 'image_glob').
4.) A video file, such as an .mp4 or .avi file.
"""
def __init__(self, basedir, resize, skip, image_glob, max_length=1000000):
self._ip_grabbed = False
self._ip_running = False
self._ip_camera = False
self._ip_image = None
self._ip_index = 0
self.cap = []
self.camera = True
self.video_file = False
self.listing = []
self.resize = resize
self.interp = cv2.INTER_AREA
self.i = 0
self.skip = skip
self.max_length = max_length
if isinstance(basedir, int) or basedir.isdigit():
print("==> Processing USB webcam input: {}".format(basedir))
self.cap = cv2.VideoCapture(int(basedir))
self.listing = range(0, self.max_length)
elif basedir.startswith(("http", "rtsp")):
print("==> Processing IP camera input: {}".format(basedir))
self.cap = cv2.VideoCapture(basedir)
self.start_ip_camera_thread()
self._ip_camera = True
self.listing = range(0, self.max_length)
elif Path(basedir).is_dir():
print("==> Processing image directory input: {}".format(basedir))
self.listing = list(Path(basedir).glob(image_glob[0]))
for j in range(1, len(image_glob)):
image_path = list(Path(basedir).glob(image_glob[j]))
self.listing = self.listing + image_path
self.listing.sort()
self.listing = self.listing[:: self.skip]
self.max_length = np.min([self.max_length, len(self.listing)])
if self.max_length == 0:
raise IOError("No images found (maybe bad 'image_glob' ?)")
self.listing = self.listing[: self.max_length]
self.camera = False
elif Path(basedir).exists():
print("==> Processing video input: {}".format(basedir))
self.cap = cv2.VideoCapture(basedir)
self.cap.set(cv2.CAP_PROP_BUFFERSIZE, 1)
num_frames = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT))
self.listing = range(0, num_frames)
self.listing = self.listing[:: self.skip]
self.video_file = True
self.max_length = np.min([self.max_length, len(self.listing)])
self.listing = self.listing[: self.max_length]
else:
raise ValueError('VideoStreamer input "{}" not recognized.'.format(basedir))
if self.camera and not self.cap.isOpened():
raise IOError("Could not read camera")
def load_image(self, impath):
"""Read image as grayscale and resize to img_size.
Inputs
impath: Path to input image.
Returns
grayim: uint8 numpy array sized H x W.
"""
grayim = cv2.imread(impath, 0)
if grayim is None:
raise Exception("Error reading image %s" % impath)
w, h = grayim.shape[1], grayim.shape[0]
w_new, h_new = process_resize(w, h, self.resize)
grayim = cv2.resize(grayim, (w_new, h_new), interpolation=self.interp)
return grayim
def next_frame(self):
"""Return the next frame, and increment internal counter.
Returns
image: Next H x W image.
status: True or False depending whether image was loaded.
"""
if self.i == self.max_length:
return (None, False)
if self.camera:
if self._ip_camera:
# Wait for first image, making sure we haven't exited
while self._ip_grabbed is False and self._ip_exited is False:
time.sleep(0.001)
ret, image = self._ip_grabbed, self._ip_image.copy()
if ret is False:
self._ip_running = False
else:
ret, image = self.cap.read()
if ret is False:
print("VideoStreamer: Cannot get image from camera")
return (None, False)
w, h = image.shape[1], image.shape[0]
if self.video_file:
self.cap.set(cv2.CAP_PROP_POS_FRAMES, self.listing[self.i])
w_new, h_new = process_resize(w, h, self.resize)
image = cv2.resize(image, (w_new, h_new), interpolation=self.interp)
image = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
else:
image_file = str(self.listing[self.i])
image = self.load_image(image_file)
self.i = self.i + 1
return (image, True)
def start_ip_camera_thread(self):
self._ip_thread = Thread(target=self.update_ip_camera, args=())
self._ip_running = True
self._ip_thread.start()
self._ip_exited = False
return self
def update_ip_camera(self):
while self._ip_running:
ret, img = self.cap.read()
if ret is False:
self._ip_running = False
self._ip_exited = True
self._ip_grabbed = False
return
self._ip_image = img
self._ip_grabbed = ret
self._ip_index += 1
# print('IPCAMERA THREAD got frame {}'.format(self._ip_index))
def cleanup(self):
self._ip_running = False
# --- PREPROCESSING ---
def process_resize(w, h, resize):
assert len(resize) > 0 and len(resize) <= 2
if len(resize) == 1 and resize[0] > -1:
scale = resize[0] / max(h, w)
w_new, h_new = int(round(w * scale)), int(round(h * scale))
elif len(resize) == 1 and resize[0] == -1:
w_new, h_new = w, h
else: # len(resize) == 2:
w_new, h_new = resize[0], resize[1]
# Issue warning if resolution is too small or too large.
if max(w_new, h_new) < 160:
print("Warning: input resolution is very small, results may vary")
elif max(w_new, h_new) > 2000:
print("Warning: input resolution is very large, results may vary")
return w_new, h_new
def frame2tensor(frame, device):
return torch.from_numpy(frame / 255.0).float()[None, None].to(device)
def read_image(path, device, resize, rotation, resize_float):
image = cv2.imread(str(path), cv2.IMREAD_GRAYSCALE)
if image is None:
return None, None, None
w, h = image.shape[1], image.shape[0]
w_new, h_new = process_resize(w, h, resize)
scales = (float(w) / float(w_new), float(h) / float(h_new))
if resize_float:
image = cv2.resize(image.astype("float32"), (w_new, h_new))
else:
image = cv2.resize(image, (w_new, h_new)).astype("float32")
if rotation != 0:
image = np.rot90(image, k=rotation)
if rotation % 2:
scales = scales[::-1]
inp = frame2tensor(image, device)
return image, inp, scales
# --- GEOMETRY ---
def estimate_pose(kpts0, kpts1, K0, K1, thresh, conf=0.99999):
if len(kpts0) < 5:
return None
f_mean = np.mean([K0[0, 0], K1[1, 1], K0[0, 0], K1[1, 1]])
norm_thresh = thresh / f_mean
kpts0 = (kpts0 - K0[[0, 1], [2, 2]][None]) / K0[[0, 1], [0, 1]][None]
kpts1 = (kpts1 - K1[[0, 1], [2, 2]][None]) / K1[[0, 1], [0, 1]][None]
E, mask = cv2.findEssentialMat(
kpts0, kpts1, np.eye(3), threshold=norm_thresh, prob=conf, method=cv2.RANSAC
)
assert E is not None
best_num_inliers = 0
ret = None
for _E in np.split(E, len(E) / 3):
n, R, t, _ = cv2.recoverPose(_E, kpts0, kpts1, np.eye(3), 1e9, mask=mask)
if n > best_num_inliers:
best_num_inliers = n
ret = (R, t[:, 0], mask.ravel() > 0)
return ret
def rotate_intrinsics(K, image_shape, rot):
"""image_shape is the shape of the image after rotation"""
assert rot <= 3
h, w = image_shape[:2][:: -1 if (rot % 2) else 1]
fx, fy, cx, cy = K[0, 0], K[1, 1], K[0, 2], K[1, 2]
rot = rot % 4
if rot == 1:
return np.array(
[[fy, 0.0, cy], [0.0, fx, w - 1 - cx], [0.0, 0.0, 1.0]], dtype=K.dtype
)
elif rot == 2:
return np.array(
[[fx, 0.0, w - 1 - cx], [0.0, fy, h - 1 - cy], [0.0, 0.0, 1.0]],
dtype=K.dtype,
)
else: # if rot == 3:
return np.array(
[[fy, 0.0, h - 1 - cy], [0.0, fx, cx], [0.0, 0.0, 1.0]], dtype=K.dtype
)
def rotate_pose_inplane(i_T_w, rot):
rotation_matrices = [
np.array(
[
[np.cos(r), -np.sin(r), 0.0, 0.0],
[np.sin(r), np.cos(r), 0.0, 0.0],
[0.0, 0.0, 1.0, 0.0],
[0.0, 0.0, 0.0, 1.0],
],
dtype=np.float32,
)
for r in [np.deg2rad(d) for d in (0, 270, 180, 90)]
]
return np.dot(rotation_matrices[rot], i_T_w)
def scale_intrinsics(K, scales):
scales = np.diag([1.0 / scales[0], 1.0 / scales[1], 1.0])
return np.dot(scales, K)
def to_homogeneous(points):
return np.concatenate([points, np.ones_like(points[:, :1])], axis=-1)
def compute_epipolar_error(kpts0, kpts1, T_0to1, K0, K1):
kpts0 = (kpts0 - K0[[0, 1], [2, 2]][None]) / K0[[0, 1], [0, 1]][None]
kpts1 = (kpts1 - K1[[0, 1], [2, 2]][None]) / K1[[0, 1], [0, 1]][None]
kpts0 = to_homogeneous(kpts0)
kpts1 = to_homogeneous(kpts1)
t0, t1, t2 = T_0to1[:3, 3]
t_skew = np.array([[0, -t2, t1], [t2, 0, -t0], [-t1, t0, 0]])
E = t_skew @ T_0to1[:3, :3]
Ep0 = kpts0 @ E.T # N x 3
p1Ep0 = np.sum(kpts1 * Ep0, -1) # N
Etp1 = kpts1 @ E # N x 3
d = p1Ep0**2 * (
1.0 / (Ep0[:, 0] ** 2 + Ep0[:, 1] ** 2)
+ 1.0 / (Etp1[:, 0] ** 2 + Etp1[:, 1] ** 2)
)
return d
def angle_error_mat(R1, R2):
cos = (np.trace(np.dot(R1.T, R2)) - 1) / 2
cos = np.clip(cos, -1.0, 1.0) # numercial errors can make it out of bounds
return np.rad2deg(np.abs(np.arccos(cos)))
def angle_error_vec(v1, v2):
n = np.linalg.norm(v1) * np.linalg.norm(v2)
return np.rad2deg(np.arccos(np.clip(np.dot(v1, v2) / n, -1.0, 1.0)))
def compute_pose_error(T_0to1, R, t):
R_gt = T_0to1[:3, :3]
t_gt = T_0to1[:3, 3]
error_t = angle_error_vec(t, t_gt)
error_t = np.minimum(error_t, 180 - error_t) # ambiguity of E estimation
error_R = angle_error_mat(R, R_gt)
return error_t, error_R
def pose_auc(errors, thresholds):
sort_idx = np.argsort(errors)
errors = np.array(errors.copy())[sort_idx]
recall = (np.arange(len(errors)) + 1) / len(errors)
errors = np.r_[0.0, errors]
recall = np.r_[0.0, recall]
aucs = []
for t in thresholds:
last_index = np.searchsorted(errors, t)
r = np.r_[recall[:last_index], recall[last_index - 1]]
e = np.r_[errors[:last_index], t]
aucs.append(np.trapz(r, x=e) / t)
return aucs
# --- VISUALIZATION ---
def plot_image_pair(imgs, dpi=100, size=6, pad=0.5):
n = len(imgs)
assert n == 2, "number of images must be two"
figsize = (size * n, size * 3 / 4) if size is not None else None
_, ax = plt.subplots(1, n, figsize=figsize, dpi=dpi)
for i in range(n):
ax[i].imshow(imgs[i], cmap=plt.get_cmap("gray"), vmin=0, vmax=255)
ax[i].get_yaxis().set_ticks([])
ax[i].get_xaxis().set_ticks([])
for spine in ax[i].spines.values(): # remove frame
spine.set_visible(False)
plt.tight_layout(pad=pad)
def plot_keypoints(kpts0, kpts1, color="w", ps=2):
ax = plt.gcf().axes
ax[0].scatter(kpts0[:, 0], kpts0[:, 1], c=color, s=ps)
ax[1].scatter(kpts1[:, 0], kpts1[:, 1], c=color, s=ps)
def plot_matches(kpts0, kpts1, color, lw=1.5, ps=4):
fig = plt.gcf()
ax = fig.axes
fig.canvas.draw()
transFigure = fig.transFigure.inverted()
fkpts0 = transFigure.transform(ax[0].transData.transform(kpts0))
fkpts1 = transFigure.transform(ax[1].transData.transform(kpts1))
fig.lines = [
matplotlib.lines.Line2D(
(fkpts0[i, 0], fkpts1[i, 0]),
(fkpts0[i, 1], fkpts1[i, 1]),
zorder=1,
transform=fig.transFigure,
c=color[i],
linewidth=lw,
)
for i in range(len(kpts0))
]
ax[0].scatter(kpts0[:, 0], kpts0[:, 1], c=color, s=ps)
ax[1].scatter(kpts1[:, 0], kpts1[:, 1], c=color, s=ps)
def make_matching_plot(
image0,
image1,
kpts0,
kpts1,
mkpts0,
mkpts1,
color,
text,
path,
show_keypoints=False,
fast_viz=False,
opencv_display=False,
opencv_title="matches",
small_text=[],
):
if fast_viz:
make_matching_plot_fast(
image0,
image1,
kpts0,
kpts1,
mkpts0,
mkpts1,
color,
text,
path,
show_keypoints,
10,
opencv_display,
opencv_title,
small_text,
)
return
plot_image_pair([image0, image1])
if show_keypoints:
plot_keypoints(kpts0, kpts1, color="k", ps=4)
plot_keypoints(kpts0, kpts1, color="w", ps=2)
plot_matches(mkpts0, mkpts1, color)
fig = plt.gcf()
txt_color = "k" if image0[:100, :150].mean() > 200 else "w"
fig.text(
0.01,
0.99,
"\n".join(text),
transform=fig.axes[0].transAxes,
fontsize=15,
va="top",
ha="left",
color=txt_color,
)
txt_color = "k" if image0[-100:, :150].mean() > 200 else "w"
fig.text(
0.01,
0.01,
"\n".join(small_text),
transform=fig.axes[0].transAxes,
fontsize=5,
va="bottom",
ha="left",
color=txt_color,
)
plt.savefig(str(path), bbox_inches="tight", pad_inches=0)
plt.close()
def make_matching_plot_fast(
image0,
image1,
kpts0,
kpts1,
mkpts0,
mkpts1,
color,
text,
path=None,
show_keypoints=False,
margin=10,
opencv_display=False,
opencv_title="",
small_text=[],
):
H0, W0 = image0.shape
H1, W1 = image1.shape
H, W = max(H0, H1), W0 + W1 + margin
out = 255 * np.ones((H, W), np.uint8)
out[:H0, :W0] = image0
out[:H1, W0 + margin :] = image1
out = np.stack([out] * 3, -1)
if show_keypoints:
kpts0, kpts1 = np.round(kpts0).astype(int), np.round(kpts1).astype(int)
white = (255, 255, 255)
black = (0, 0, 0)
for x, y in kpts0:
cv2.circle(out, (x, y), 2, black, -1, lineType=cv2.LINE_AA)
cv2.circle(out, (x, y), 1, white, -1, lineType=cv2.LINE_AA)
for x, y in kpts1:
cv2.circle(out, (x + margin + W0, y), 2, black, -1, lineType=cv2.LINE_AA)
cv2.circle(out, (x + margin + W0, y), 1, white, -1, lineType=cv2.LINE_AA)
mkpts0, mkpts1 = np.round(mkpts0).astype(int), np.round(mkpts1).astype(int)
color = (np.array(color[:, :3]) * 255).astype(int)[:, ::-1]
for (x0, y0), (x1, y1), c in zip(mkpts0, mkpts1, color):
c = c.tolist()
cv2.line(
out,
(x0, y0),
(x1 + margin + W0, y1),
color=c,
thickness=1,
lineType=cv2.LINE_AA,
)
# display line end-points as circles
cv2.circle(out, (x0, y0), 2, c, -1, lineType=cv2.LINE_AA)
cv2.circle(out, (x1 + margin + W0, y1), 2, c, -1, lineType=cv2.LINE_AA)
# Scale factor for consistent visualization across scales.
sc = min(H / 640.0, 2.0)
# Big text.
Ht = int(30 * sc) # text height
txt_color_fg = (255, 255, 255)
txt_color_bg = (0, 0, 0)
for i, t in enumerate(text):
cv2.putText(
out,
t,
(int(8 * sc), Ht * (i + 1)),
cv2.FONT_HERSHEY_DUPLEX,
1.0 * sc,
txt_color_bg,
2,
cv2.LINE_AA,
)
cv2.putText(
out,
t,
(int(8 * sc), Ht * (i + 1)),
cv2.FONT_HERSHEY_DUPLEX,
1.0 * sc,
txt_color_fg,
1,
cv2.LINE_AA,
)
# Small text.
Ht = int(18 * sc) # text height
for i, t in enumerate(reversed(small_text)):
cv2.putText(
out,
t,
(int(8 * sc), int(H - Ht * (i + 0.6))),
cv2.FONT_HERSHEY_DUPLEX,
0.5 * sc,
txt_color_bg,
2,
cv2.LINE_AA,
)
cv2.putText(
out,
t,
(int(8 * sc), int(H - Ht * (i + 0.6))),
cv2.FONT_HERSHEY_DUPLEX,
0.5 * sc,
txt_color_fg,
1,
cv2.LINE_AA,
)
if path is not None:
cv2.imwrite(str(path), out)
if opencv_display:
cv2.imshow(opencv_title, out)
cv2.waitKey(1)
return out
def error_colormap(x):
return np.clip(
np.stack([2 - x * 2, x * 2, np.zeros_like(x), np.ones_like(x)], -1), 0, 1
)