|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
from pathlib import Path |
|
import argparse |
|
import cv2 |
|
import matplotlib.cm as cm |
|
import torch |
|
|
|
from models.matching import Matching |
|
from models.utils import ( |
|
AverageTimer, |
|
VideoStreamer, |
|
make_matching_plot_fast, |
|
frame2tensor, |
|
) |
|
|
|
torch.set_grad_enabled(False) |
|
|
|
|
|
if __name__ == "__main__": |
|
parser = argparse.ArgumentParser( |
|
description="SuperGlue demo", |
|
formatter_class=argparse.ArgumentDefaultsHelpFormatter, |
|
) |
|
parser.add_argument( |
|
"--input", |
|
type=str, |
|
default="0", |
|
help="ID of a USB webcam, URL of an IP camera, " |
|
"or path to an image directory or movie file", |
|
) |
|
parser.add_argument( |
|
"--output_dir", |
|
type=str, |
|
default=None, |
|
help="Directory where to write output frames (If None, no output)", |
|
) |
|
|
|
parser.add_argument( |
|
"--image_glob", |
|
type=str, |
|
nargs="+", |
|
default=["*.png", "*.jpg", "*.jpeg"], |
|
help="Glob if a directory of images is specified", |
|
) |
|
parser.add_argument( |
|
"--skip", |
|
type=int, |
|
default=1, |
|
help="Images to skip if input is a movie or directory", |
|
) |
|
parser.add_argument( |
|
"--max_length", |
|
type=int, |
|
default=1000000, |
|
help="Maximum length if input is a movie or directory", |
|
) |
|
parser.add_argument( |
|
"--resize", |
|
type=int, |
|
nargs="+", |
|
default=[640, 480], |
|
help="Resize the input image before running inference. If two numbers, " |
|
"resize to the exact dimensions, if one number, resize the max " |
|
"dimension, if -1, do not resize", |
|
) |
|
|
|
parser.add_argument( |
|
"--superglue", |
|
choices={"indoor", "outdoor"}, |
|
default="indoor", |
|
help="SuperGlue weights", |
|
) |
|
parser.add_argument( |
|
"--max_keypoints", |
|
type=int, |
|
default=-1, |
|
help="Maximum number of keypoints detected by Superpoint" |
|
" ('-1' keeps all keypoints)", |
|
) |
|
parser.add_argument( |
|
"--keypoint_threshold", |
|
type=float, |
|
default=0.005, |
|
help="SuperPoint keypoint detector confidence threshold", |
|
) |
|
parser.add_argument( |
|
"--nms_radius", |
|
type=int, |
|
default=4, |
|
help="SuperPoint Non Maximum Suppression (NMS) radius" " (Must be positive)", |
|
) |
|
parser.add_argument( |
|
"--sinkhorn_iterations", |
|
type=int, |
|
default=20, |
|
help="Number of Sinkhorn iterations performed by SuperGlue", |
|
) |
|
parser.add_argument( |
|
"--match_threshold", type=float, default=0.2, help="SuperGlue match threshold" |
|
) |
|
|
|
parser.add_argument( |
|
"--show_keypoints", action="store_true", help="Show the detected keypoints" |
|
) |
|
parser.add_argument( |
|
"--no_display", |
|
action="store_true", |
|
help="Do not display images to screen. Useful if running remotely", |
|
) |
|
parser.add_argument( |
|
"--force_cpu", action="store_true", help="Force pytorch to run in CPU mode." |
|
) |
|
|
|
opt = parser.parse_args() |
|
print(opt) |
|
|
|
if len(opt.resize) == 2 and opt.resize[1] == -1: |
|
opt.resize = opt.resize[0:1] |
|
if len(opt.resize) == 2: |
|
print("Will resize to {}x{} (WxH)".format(opt.resize[0], opt.resize[1])) |
|
elif len(opt.resize) == 1 and opt.resize[0] > 0: |
|
print("Will resize max dimension to {}".format(opt.resize[0])) |
|
elif len(opt.resize) == 1: |
|
print("Will not resize images") |
|
else: |
|
raise ValueError("Cannot specify more than two integers for --resize") |
|
|
|
device = "cuda" if torch.cuda.is_available() and not opt.force_cpu else "cpu" |
|
print('Running inference on device "{}"'.format(device)) |
|
config = { |
|
"superpoint": { |
|
"nms_radius": opt.nms_radius, |
|
"keypoint_threshold": opt.keypoint_threshold, |
|
"max_keypoints": opt.max_keypoints, |
|
}, |
|
"superglue": { |
|
"weights": opt.superglue, |
|
"sinkhorn_iterations": opt.sinkhorn_iterations, |
|
"match_threshold": opt.match_threshold, |
|
}, |
|
} |
|
matching = Matching(config).eval().to(device) |
|
keys = ["keypoints", "scores", "descriptors"] |
|
|
|
vs = VideoStreamer(opt.input, opt.resize, opt.skip, opt.image_glob, opt.max_length) |
|
frame, ret = vs.next_frame() |
|
assert ret, "Error when reading the first frame (try different --input?)" |
|
|
|
frame_tensor = frame2tensor(frame, device) |
|
last_data = matching.superpoint({"image": frame_tensor}) |
|
last_data = {k + "0": last_data[k] for k in keys} |
|
last_data["image0"] = frame_tensor |
|
last_frame = frame |
|
last_image_id = 0 |
|
|
|
if opt.output_dir is not None: |
|
print("==> Will write outputs to {}".format(opt.output_dir)) |
|
Path(opt.output_dir).mkdir(exist_ok=True) |
|
|
|
|
|
if not opt.no_display: |
|
cv2.namedWindow("SuperGlue matches", cv2.WINDOW_NORMAL) |
|
cv2.resizeWindow("SuperGlue matches", 640 * 2, 480) |
|
else: |
|
print("Skipping visualization, will not show a GUI.") |
|
|
|
|
|
print( |
|
"==> Keyboard control:\n" |
|
"\tn: select the current frame as the anchor\n" |
|
"\te/r: increase/decrease the keypoint confidence threshold\n" |
|
"\td/f: increase/decrease the match filtering threshold\n" |
|
"\tk: toggle the visualization of keypoints\n" |
|
"\tq: quit" |
|
) |
|
|
|
timer = AverageTimer() |
|
|
|
while True: |
|
frame, ret = vs.next_frame() |
|
if not ret: |
|
print("Finished demo_superglue.py") |
|
break |
|
timer.update("data") |
|
stem0, stem1 = last_image_id, vs.i - 1 |
|
|
|
frame_tensor = frame2tensor(frame, device) |
|
pred = matching({**last_data, "image1": frame_tensor}) |
|
kpts0 = last_data["keypoints0"][0].cpu().numpy() |
|
kpts1 = pred["keypoints1"][0].cpu().numpy() |
|
matches = pred["matches0"][0].cpu().numpy() |
|
confidence = pred["matching_scores0"][0].cpu().numpy() |
|
timer.update("forward") |
|
|
|
valid = matches > -1 |
|
mkpts0 = kpts0[valid] |
|
mkpts1 = kpts1[matches[valid]] |
|
color = cm.jet(confidence[valid]) |
|
text = [ |
|
"SuperGlue", |
|
"Keypoints: {}:{}".format(len(kpts0), len(kpts1)), |
|
"Matches: {}".format(len(mkpts0)), |
|
] |
|
k_thresh = matching.superpoint.config["keypoint_threshold"] |
|
m_thresh = matching.superglue.config["match_threshold"] |
|
small_text = [ |
|
"Keypoint Threshold: {:.4f}".format(k_thresh), |
|
"Match Threshold: {:.2f}".format(m_thresh), |
|
"Image Pair: {:06}:{:06}".format(stem0, stem1), |
|
] |
|
out = make_matching_plot_fast( |
|
last_frame, |
|
frame, |
|
kpts0, |
|
kpts1, |
|
mkpts0, |
|
mkpts1, |
|
color, |
|
text, |
|
path=None, |
|
show_keypoints=opt.show_keypoints, |
|
small_text=small_text, |
|
) |
|
|
|
if not opt.no_display: |
|
cv2.imshow("SuperGlue matches", out) |
|
key = chr(cv2.waitKey(1) & 0xFF) |
|
if key == "q": |
|
vs.cleanup() |
|
print("Exiting (via q) demo_superglue.py") |
|
break |
|
elif key == "n": |
|
last_data = {k + "0": pred[k + "1"] for k in keys} |
|
last_data["image0"] = frame_tensor |
|
last_frame = frame |
|
last_image_id = vs.i - 1 |
|
elif key in ["e", "r"]: |
|
|
|
d = 0.1 * (-1 if key == "e" else 1) |
|
matching.superpoint.config["keypoint_threshold"] = min( |
|
max( |
|
0.0001, |
|
matching.superpoint.config["keypoint_threshold"] * (1 + d), |
|
), |
|
1, |
|
) |
|
print( |
|
"\nChanged the keypoint threshold to {:.4f}".format( |
|
matching.superpoint.config["keypoint_threshold"] |
|
) |
|
) |
|
elif key in ["d", "f"]: |
|
|
|
d = 0.05 * (-1 if key == "d" else 1) |
|
matching.superglue.config["match_threshold"] = min( |
|
max(0.05, matching.superglue.config["match_threshold"] + d), 0.95 |
|
) |
|
print( |
|
"\nChanged the match threshold to {:.2f}".format( |
|
matching.superglue.config["match_threshold"] |
|
) |
|
) |
|
elif key == "k": |
|
opt.show_keypoints = not opt.show_keypoints |
|
|
|
timer.update("viz") |
|
timer.print() |
|
|
|
if opt.output_dir is not None: |
|
|
|
stem = "matches_{:06}_{:06}".format(stem0, stem1) |
|
out_file = str(Path(opt.output_dir, stem + ".png")) |
|
print("\nWriting image to {}".format(out_file)) |
|
cv2.imwrite(out_file, out) |
|
|
|
cv2.destroyAllWindows() |
|
vs.cleanup() |
|
|