|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
from pathlib import Path |
|
import time |
|
from collections import OrderedDict |
|
from threading import Thread |
|
import numpy as np |
|
import cv2 |
|
import torch |
|
import matplotlib.pyplot as plt |
|
import matplotlib |
|
|
|
matplotlib.use("Agg") |
|
|
|
|
|
class AverageTimer: |
|
"""Class to help manage printing simple timing of code execution.""" |
|
|
|
def __init__(self, smoothing=0.3, newline=False): |
|
self.smoothing = smoothing |
|
self.newline = newline |
|
self.times = OrderedDict() |
|
self.will_print = OrderedDict() |
|
self.reset() |
|
|
|
def reset(self): |
|
now = time.time() |
|
self.start = now |
|
self.last_time = now |
|
for name in self.will_print: |
|
self.will_print[name] = False |
|
|
|
def update(self, name="default"): |
|
now = time.time() |
|
dt = now - self.last_time |
|
if name in self.times: |
|
dt = self.smoothing * dt + (1 - self.smoothing) * self.times[name] |
|
self.times[name] = dt |
|
self.will_print[name] = True |
|
self.last_time = now |
|
|
|
def print(self, text="Timer"): |
|
total = 0.0 |
|
print("[{}]".format(text), end=" ") |
|
for key in self.times: |
|
val = self.times[key] |
|
if self.will_print[key]: |
|
print("%s=%.3f" % (key, val), end=" ") |
|
total += val |
|
print("total=%.3f sec {%.1f FPS}" % (total, 1.0 / total), end=" ") |
|
if self.newline: |
|
print(flush=True) |
|
else: |
|
print(end="\r", flush=True) |
|
self.reset() |
|
|
|
|
|
class VideoStreamer: |
|
"""Class to help process image streams. Four types of possible inputs:" |
|
1.) USB Webcam. |
|
2.) An IP camera |
|
3.) A directory of images (files in directory matching 'image_glob'). |
|
4.) A video file, such as an .mp4 or .avi file. |
|
""" |
|
|
|
def __init__(self, basedir, resize, skip, image_glob, max_length=1000000): |
|
self._ip_grabbed = False |
|
self._ip_running = False |
|
self._ip_camera = False |
|
self._ip_image = None |
|
self._ip_index = 0 |
|
self.cap = [] |
|
self.camera = True |
|
self.video_file = False |
|
self.listing = [] |
|
self.resize = resize |
|
self.interp = cv2.INTER_AREA |
|
self.i = 0 |
|
self.skip = skip |
|
self.max_length = max_length |
|
if isinstance(basedir, int) or basedir.isdigit(): |
|
print("==> Processing USB webcam input: {}".format(basedir)) |
|
self.cap = cv2.VideoCapture(int(basedir)) |
|
self.listing = range(0, self.max_length) |
|
elif basedir.startswith(("http", "rtsp")): |
|
print("==> Processing IP camera input: {}".format(basedir)) |
|
self.cap = cv2.VideoCapture(basedir) |
|
self.start_ip_camera_thread() |
|
self._ip_camera = True |
|
self.listing = range(0, self.max_length) |
|
elif Path(basedir).is_dir(): |
|
print("==> Processing image directory input: {}".format(basedir)) |
|
self.listing = list(Path(basedir).glob(image_glob[0])) |
|
for j in range(1, len(image_glob)): |
|
image_path = list(Path(basedir).glob(image_glob[j])) |
|
self.listing = self.listing + image_path |
|
self.listing.sort() |
|
self.listing = self.listing[:: self.skip] |
|
self.max_length = np.min([self.max_length, len(self.listing)]) |
|
if self.max_length == 0: |
|
raise IOError("No images found (maybe bad 'image_glob' ?)") |
|
self.listing = self.listing[: self.max_length] |
|
self.camera = False |
|
elif Path(basedir).exists(): |
|
print("==> Processing video input: {}".format(basedir)) |
|
self.cap = cv2.VideoCapture(basedir) |
|
self.cap.set(cv2.CAP_PROP_BUFFERSIZE, 1) |
|
num_frames = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT)) |
|
self.listing = range(0, num_frames) |
|
self.listing = self.listing[:: self.skip] |
|
self.video_file = True |
|
self.max_length = np.min([self.max_length, len(self.listing)]) |
|
self.listing = self.listing[: self.max_length] |
|
else: |
|
raise ValueError('VideoStreamer input "{}" not recognized.'.format(basedir)) |
|
if self.camera and not self.cap.isOpened(): |
|
raise IOError("Could not read camera") |
|
|
|
def load_image(self, impath): |
|
"""Read image as grayscale and resize to img_size. |
|
Inputs |
|
impath: Path to input image. |
|
Returns |
|
grayim: uint8 numpy array sized H x W. |
|
""" |
|
grayim = cv2.imread(impath, 0) |
|
if grayim is None: |
|
raise Exception("Error reading image %s" % impath) |
|
w, h = grayim.shape[1], grayim.shape[0] |
|
w_new, h_new = process_resize(w, h, self.resize) |
|
grayim = cv2.resize(grayim, (w_new, h_new), interpolation=self.interp) |
|
return grayim |
|
|
|
def next_frame(self): |
|
"""Return the next frame, and increment internal counter. |
|
Returns |
|
image: Next H x W image. |
|
status: True or False depending whether image was loaded. |
|
""" |
|
|
|
if self.i == self.max_length: |
|
return (None, False) |
|
if self.camera: |
|
|
|
if self._ip_camera: |
|
|
|
while self._ip_grabbed is False and self._ip_exited is False: |
|
time.sleep(0.001) |
|
|
|
ret, image = self._ip_grabbed, self._ip_image.copy() |
|
if ret is False: |
|
self._ip_running = False |
|
else: |
|
ret, image = self.cap.read() |
|
if ret is False: |
|
print("VideoStreamer: Cannot get image from camera") |
|
return (None, False) |
|
w, h = image.shape[1], image.shape[0] |
|
if self.video_file: |
|
self.cap.set(cv2.CAP_PROP_POS_FRAMES, self.listing[self.i]) |
|
|
|
w_new, h_new = process_resize(w, h, self.resize) |
|
image = cv2.resize(image, (w_new, h_new), interpolation=self.interp) |
|
image = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY) |
|
else: |
|
image_file = str(self.listing[self.i]) |
|
image = self.load_image(image_file) |
|
self.i = self.i + 1 |
|
return (image, True) |
|
|
|
def start_ip_camera_thread(self): |
|
self._ip_thread = Thread(target=self.update_ip_camera, args=()) |
|
self._ip_running = True |
|
self._ip_thread.start() |
|
self._ip_exited = False |
|
return self |
|
|
|
def update_ip_camera(self): |
|
while self._ip_running: |
|
ret, img = self.cap.read() |
|
if ret is False: |
|
self._ip_running = False |
|
self._ip_exited = True |
|
self._ip_grabbed = False |
|
return |
|
|
|
self._ip_image = img |
|
self._ip_grabbed = ret |
|
self._ip_index += 1 |
|
|
|
|
|
def cleanup(self): |
|
self._ip_running = False |
|
|
|
|
|
|
|
|
|
|
|
def process_resize(w, h, resize): |
|
assert len(resize) > 0 and len(resize) <= 2 |
|
if len(resize) == 1 and resize[0] > -1: |
|
scale = resize[0] / max(h, w) |
|
w_new, h_new = int(round(w * scale)), int(round(h * scale)) |
|
elif len(resize) == 1 and resize[0] == -1: |
|
w_new, h_new = w, h |
|
else: |
|
w_new, h_new = resize[0], resize[1] |
|
|
|
|
|
if max(w_new, h_new) < 160: |
|
print("Warning: input resolution is very small, results may vary") |
|
elif max(w_new, h_new) > 2000: |
|
print("Warning: input resolution is very large, results may vary") |
|
|
|
return w_new, h_new |
|
|
|
|
|
def frame2tensor(frame, device): |
|
return torch.from_numpy(frame / 255.0).float()[None, None].to(device) |
|
|
|
|
|
def read_image(path, device, resize, rotation, resize_float): |
|
image = cv2.imread(str(path), cv2.IMREAD_GRAYSCALE) |
|
if image is None: |
|
return None, None, None |
|
w, h = image.shape[1], image.shape[0] |
|
w_new, h_new = process_resize(w, h, resize) |
|
scales = (float(w) / float(w_new), float(h) / float(h_new)) |
|
|
|
if resize_float: |
|
image = cv2.resize(image.astype("float32"), (w_new, h_new)) |
|
else: |
|
image = cv2.resize(image, (w_new, h_new)).astype("float32") |
|
|
|
if rotation != 0: |
|
image = np.rot90(image, k=rotation) |
|
if rotation % 2: |
|
scales = scales[::-1] |
|
|
|
inp = frame2tensor(image, device) |
|
return image, inp, scales |
|
|
|
|
|
|
|
|
|
|
|
def estimate_pose(kpts0, kpts1, K0, K1, thresh, conf=0.99999): |
|
if len(kpts0) < 5: |
|
return None |
|
|
|
f_mean = np.mean([K0[0, 0], K1[1, 1], K0[0, 0], K1[1, 1]]) |
|
norm_thresh = thresh / f_mean |
|
|
|
kpts0 = (kpts0 - K0[[0, 1], [2, 2]][None]) / K0[[0, 1], [0, 1]][None] |
|
kpts1 = (kpts1 - K1[[0, 1], [2, 2]][None]) / K1[[0, 1], [0, 1]][None] |
|
|
|
E, mask = cv2.findEssentialMat( |
|
kpts0, kpts1, np.eye(3), threshold=norm_thresh, prob=conf, method=cv2.RANSAC |
|
) |
|
|
|
assert E is not None |
|
|
|
best_num_inliers = 0 |
|
ret = None |
|
for _E in np.split(E, len(E) / 3): |
|
n, R, t, _ = cv2.recoverPose(_E, kpts0, kpts1, np.eye(3), 1e9, mask=mask) |
|
if n > best_num_inliers: |
|
best_num_inliers = n |
|
ret = (R, t[:, 0], mask.ravel() > 0) |
|
return ret |
|
|
|
|
|
def rotate_intrinsics(K, image_shape, rot): |
|
"""image_shape is the shape of the image after rotation""" |
|
assert rot <= 3 |
|
h, w = image_shape[:2][:: -1 if (rot % 2) else 1] |
|
fx, fy, cx, cy = K[0, 0], K[1, 1], K[0, 2], K[1, 2] |
|
rot = rot % 4 |
|
if rot == 1: |
|
return np.array( |
|
[[fy, 0.0, cy], [0.0, fx, w - 1 - cx], [0.0, 0.0, 1.0]], dtype=K.dtype |
|
) |
|
elif rot == 2: |
|
return np.array( |
|
[[fx, 0.0, w - 1 - cx], [0.0, fy, h - 1 - cy], [0.0, 0.0, 1.0]], |
|
dtype=K.dtype, |
|
) |
|
else: |
|
return np.array( |
|
[[fy, 0.0, h - 1 - cy], [0.0, fx, cx], [0.0, 0.0, 1.0]], dtype=K.dtype |
|
) |
|
|
|
|
|
def rotate_pose_inplane(i_T_w, rot): |
|
rotation_matrices = [ |
|
np.array( |
|
[ |
|
[np.cos(r), -np.sin(r), 0.0, 0.0], |
|
[np.sin(r), np.cos(r), 0.0, 0.0], |
|
[0.0, 0.0, 1.0, 0.0], |
|
[0.0, 0.0, 0.0, 1.0], |
|
], |
|
dtype=np.float32, |
|
) |
|
for r in [np.deg2rad(d) for d in (0, 270, 180, 90)] |
|
] |
|
return np.dot(rotation_matrices[rot], i_T_w) |
|
|
|
|
|
def scale_intrinsics(K, scales): |
|
scales = np.diag([1.0 / scales[0], 1.0 / scales[1], 1.0]) |
|
return np.dot(scales, K) |
|
|
|
|
|
def to_homogeneous(points): |
|
return np.concatenate([points, np.ones_like(points[:, :1])], axis=-1) |
|
|
|
|
|
def compute_epipolar_error(kpts0, kpts1, T_0to1, K0, K1): |
|
kpts0 = (kpts0 - K0[[0, 1], [2, 2]][None]) / K0[[0, 1], [0, 1]][None] |
|
kpts1 = (kpts1 - K1[[0, 1], [2, 2]][None]) / K1[[0, 1], [0, 1]][None] |
|
kpts0 = to_homogeneous(kpts0) |
|
kpts1 = to_homogeneous(kpts1) |
|
|
|
t0, t1, t2 = T_0to1[:3, 3] |
|
t_skew = np.array([[0, -t2, t1], [t2, 0, -t0], [-t1, t0, 0]]) |
|
E = t_skew @ T_0to1[:3, :3] |
|
|
|
Ep0 = kpts0 @ E.T |
|
p1Ep0 = np.sum(kpts1 * Ep0, -1) |
|
Etp1 = kpts1 @ E |
|
d = p1Ep0**2 * ( |
|
1.0 / (Ep0[:, 0] ** 2 + Ep0[:, 1] ** 2) |
|
+ 1.0 / (Etp1[:, 0] ** 2 + Etp1[:, 1] ** 2) |
|
) |
|
return d |
|
|
|
|
|
def angle_error_mat(R1, R2): |
|
cos = (np.trace(np.dot(R1.T, R2)) - 1) / 2 |
|
cos = np.clip(cos, -1.0, 1.0) |
|
return np.rad2deg(np.abs(np.arccos(cos))) |
|
|
|
|
|
def angle_error_vec(v1, v2): |
|
n = np.linalg.norm(v1) * np.linalg.norm(v2) |
|
return np.rad2deg(np.arccos(np.clip(np.dot(v1, v2) / n, -1.0, 1.0))) |
|
|
|
|
|
def compute_pose_error(T_0to1, R, t): |
|
R_gt = T_0to1[:3, :3] |
|
t_gt = T_0to1[:3, 3] |
|
error_t = angle_error_vec(t, t_gt) |
|
error_t = np.minimum(error_t, 180 - error_t) |
|
error_R = angle_error_mat(R, R_gt) |
|
return error_t, error_R |
|
|
|
|
|
def pose_auc(errors, thresholds): |
|
sort_idx = np.argsort(errors) |
|
errors = np.array(errors.copy())[sort_idx] |
|
recall = (np.arange(len(errors)) + 1) / len(errors) |
|
errors = np.r_[0.0, errors] |
|
recall = np.r_[0.0, recall] |
|
aucs = [] |
|
for t in thresholds: |
|
last_index = np.searchsorted(errors, t) |
|
r = np.r_[recall[:last_index], recall[last_index - 1]] |
|
e = np.r_[errors[:last_index], t] |
|
aucs.append(np.trapz(r, x=e) / t) |
|
return aucs |
|
|
|
|
|
|
|
|
|
|
|
def plot_image_pair(imgs, dpi=100, size=6, pad=0.5): |
|
n = len(imgs) |
|
assert n == 2, "number of images must be two" |
|
figsize = (size * n, size * 3 / 4) if size is not None else None |
|
_, ax = plt.subplots(1, n, figsize=figsize, dpi=dpi) |
|
for i in range(n): |
|
ax[i].imshow(imgs[i], cmap=plt.get_cmap("gray"), vmin=0, vmax=255) |
|
ax[i].get_yaxis().set_ticks([]) |
|
ax[i].get_xaxis().set_ticks([]) |
|
for spine in ax[i].spines.values(): |
|
spine.set_visible(False) |
|
plt.tight_layout(pad=pad) |
|
|
|
|
|
def plot_keypoints(kpts0, kpts1, color="w", ps=2): |
|
ax = plt.gcf().axes |
|
ax[0].scatter(kpts0[:, 0], kpts0[:, 1], c=color, s=ps) |
|
ax[1].scatter(kpts1[:, 0], kpts1[:, 1], c=color, s=ps) |
|
|
|
|
|
def plot_matches(kpts0, kpts1, color, lw=1.5, ps=4): |
|
fig = plt.gcf() |
|
ax = fig.axes |
|
fig.canvas.draw() |
|
|
|
transFigure = fig.transFigure.inverted() |
|
fkpts0 = transFigure.transform(ax[0].transData.transform(kpts0)) |
|
fkpts1 = transFigure.transform(ax[1].transData.transform(kpts1)) |
|
|
|
fig.lines = [ |
|
matplotlib.lines.Line2D( |
|
(fkpts0[i, 0], fkpts1[i, 0]), |
|
(fkpts0[i, 1], fkpts1[i, 1]), |
|
zorder=1, |
|
transform=fig.transFigure, |
|
c=color[i], |
|
linewidth=lw, |
|
) |
|
for i in range(len(kpts0)) |
|
] |
|
ax[0].scatter(kpts0[:, 0], kpts0[:, 1], c=color, s=ps) |
|
ax[1].scatter(kpts1[:, 0], kpts1[:, 1], c=color, s=ps) |
|
|
|
|
|
def make_matching_plot( |
|
image0, |
|
image1, |
|
kpts0, |
|
kpts1, |
|
mkpts0, |
|
mkpts1, |
|
color, |
|
text, |
|
path, |
|
show_keypoints=False, |
|
fast_viz=False, |
|
opencv_display=False, |
|
opencv_title="matches", |
|
small_text=[], |
|
): |
|
|
|
if fast_viz: |
|
make_matching_plot_fast( |
|
image0, |
|
image1, |
|
kpts0, |
|
kpts1, |
|
mkpts0, |
|
mkpts1, |
|
color, |
|
text, |
|
path, |
|
show_keypoints, |
|
10, |
|
opencv_display, |
|
opencv_title, |
|
small_text, |
|
) |
|
return |
|
|
|
plot_image_pair([image0, image1]) |
|
if show_keypoints: |
|
plot_keypoints(kpts0, kpts1, color="k", ps=4) |
|
plot_keypoints(kpts0, kpts1, color="w", ps=2) |
|
plot_matches(mkpts0, mkpts1, color) |
|
|
|
fig = plt.gcf() |
|
txt_color = "k" if image0[:100, :150].mean() > 200 else "w" |
|
fig.text( |
|
0.01, |
|
0.99, |
|
"\n".join(text), |
|
transform=fig.axes[0].transAxes, |
|
fontsize=15, |
|
va="top", |
|
ha="left", |
|
color=txt_color, |
|
) |
|
|
|
txt_color = "k" if image0[-100:, :150].mean() > 200 else "w" |
|
fig.text( |
|
0.01, |
|
0.01, |
|
"\n".join(small_text), |
|
transform=fig.axes[0].transAxes, |
|
fontsize=5, |
|
va="bottom", |
|
ha="left", |
|
color=txt_color, |
|
) |
|
|
|
plt.savefig(str(path), bbox_inches="tight", pad_inches=0) |
|
plt.close() |
|
|
|
|
|
def make_matching_plot_fast( |
|
image0, |
|
image1, |
|
kpts0, |
|
kpts1, |
|
mkpts0, |
|
mkpts1, |
|
color, |
|
text, |
|
path=None, |
|
show_keypoints=False, |
|
margin=10, |
|
opencv_display=False, |
|
opencv_title="", |
|
small_text=[], |
|
): |
|
H0, W0 = image0.shape |
|
H1, W1 = image1.shape |
|
H, W = max(H0, H1), W0 + W1 + margin |
|
|
|
out = 255 * np.ones((H, W), np.uint8) |
|
out[:H0, :W0] = image0 |
|
out[:H1, W0 + margin :] = image1 |
|
out = np.stack([out] * 3, -1) |
|
|
|
if show_keypoints: |
|
kpts0, kpts1 = np.round(kpts0).astype(int), np.round(kpts1).astype(int) |
|
white = (255, 255, 255) |
|
black = (0, 0, 0) |
|
for x, y in kpts0: |
|
cv2.circle(out, (x, y), 2, black, -1, lineType=cv2.LINE_AA) |
|
cv2.circle(out, (x, y), 1, white, -1, lineType=cv2.LINE_AA) |
|
for x, y in kpts1: |
|
cv2.circle(out, (x + margin + W0, y), 2, black, -1, lineType=cv2.LINE_AA) |
|
cv2.circle(out, (x + margin + W0, y), 1, white, -1, lineType=cv2.LINE_AA) |
|
|
|
mkpts0, mkpts1 = np.round(mkpts0).astype(int), np.round(mkpts1).astype(int) |
|
color = (np.array(color[:, :3]) * 255).astype(int)[:, ::-1] |
|
for (x0, y0), (x1, y1), c in zip(mkpts0, mkpts1, color): |
|
c = c.tolist() |
|
cv2.line( |
|
out, |
|
(x0, y0), |
|
(x1 + margin + W0, y1), |
|
color=c, |
|
thickness=1, |
|
lineType=cv2.LINE_AA, |
|
) |
|
|
|
cv2.circle(out, (x0, y0), 2, c, -1, lineType=cv2.LINE_AA) |
|
cv2.circle(out, (x1 + margin + W0, y1), 2, c, -1, lineType=cv2.LINE_AA) |
|
|
|
|
|
sc = min(H / 640.0, 2.0) |
|
|
|
|
|
Ht = int(30 * sc) |
|
txt_color_fg = (255, 255, 255) |
|
txt_color_bg = (0, 0, 0) |
|
for i, t in enumerate(text): |
|
cv2.putText( |
|
out, |
|
t, |
|
(int(8 * sc), Ht * (i + 1)), |
|
cv2.FONT_HERSHEY_DUPLEX, |
|
1.0 * sc, |
|
txt_color_bg, |
|
2, |
|
cv2.LINE_AA, |
|
) |
|
cv2.putText( |
|
out, |
|
t, |
|
(int(8 * sc), Ht * (i + 1)), |
|
cv2.FONT_HERSHEY_DUPLEX, |
|
1.0 * sc, |
|
txt_color_fg, |
|
1, |
|
cv2.LINE_AA, |
|
) |
|
|
|
|
|
Ht = int(18 * sc) |
|
for i, t in enumerate(reversed(small_text)): |
|
cv2.putText( |
|
out, |
|
t, |
|
(int(8 * sc), int(H - Ht * (i + 0.6))), |
|
cv2.FONT_HERSHEY_DUPLEX, |
|
0.5 * sc, |
|
txt_color_bg, |
|
2, |
|
cv2.LINE_AA, |
|
) |
|
cv2.putText( |
|
out, |
|
t, |
|
(int(8 * sc), int(H - Ht * (i + 0.6))), |
|
cv2.FONT_HERSHEY_DUPLEX, |
|
0.5 * sc, |
|
txt_color_fg, |
|
1, |
|
cv2.LINE_AA, |
|
) |
|
|
|
if path is not None: |
|
cv2.imwrite(str(path), out) |
|
|
|
if opencv_display: |
|
cv2.imshow(opencv_title, out) |
|
cv2.waitKey(1) |
|
|
|
return out |
|
|
|
|
|
def error_colormap(x): |
|
return np.clip( |
|
np.stack([2 - x * 2, x * 2, np.zeros_like(x), np.ones_like(x)], -1), 0, 1 |
|
) |
|
|