Spaces:

Realcat
/

image-matching-webui

Running

image-matching-webui / third_party /SuperGluePretrainedNetwork /match_pairs.py

Vincentqyw

fix: roma

c74a070 over 1 year ago

18.9 kB

	#! /usr/bin/env python3
	#
	# %BANNER_BEGIN%
	# ---------------------------------------------------------------------
	# %COPYRIGHT_BEGIN%
	#
	# Magic Leap, Inc. ("COMPANY") CONFIDENTIAL
	#
	# Unpublished Copyright (c) 2020
	# Magic Leap, Inc., All Rights Reserved.
	#
	# NOTICE: All information contained herein is, and remains the property
	# of COMPANY. The intellectual and technical concepts contained herein
	# are proprietary to COMPANY and may be covered by U.S. and Foreign
	# Patents, patents in process, and are protected by trade secret or
	# copyright law. Dissemination of this information or reproduction of
	# this material is strictly forbidden unless prior written permission is
	# obtained from COMPANY. Access to the source code contained herein is
	# hereby forbidden to anyone except current COMPANY employees, managers
	# or contractors who have executed Confidentiality and Non-disclosure
	# agreements explicitly covering such access.
	#
	# The copyright notice above does not evidence any actual or intended
	# publication or disclosure of this source code, which includes
	# information that is confidential and/or proprietary, and is a trade
	# secret, of COMPANY. ANY REPRODUCTION, MODIFICATION, DISTRIBUTION,
	# PUBLIC PERFORMANCE, OR PUBLIC DISPLAY OF OR THROUGH USE OF THIS
	# SOURCE CODE WITHOUT THE EXPRESS WRITTEN CONSENT OF COMPANY IS
	# STRICTLY PROHIBITED, AND IN VIOLATION OF APPLICABLE LAWS AND
	# INTERNATIONAL TREATIES. THE RECEIPT OR POSSESSION OF THIS SOURCE
	# CODE AND/OR RELATED INFORMATION DOES NOT CONVEY OR IMPLY ANY RIGHTS
	# TO REPRODUCE, DISCLOSE OR DISTRIBUTE ITS CONTENTS, OR TO MANUFACTURE,
	# USE, OR SELL ANYTHING THAT IT MAY DESCRIBE, IN WHOLE OR IN PART.
	#
	# %COPYRIGHT_END%
	# ----------------------------------------------------------------------
	# %AUTHORS_BEGIN%
	#
	# Originating Authors: Paul-Edouard Sarlin
	# Daniel DeTone
	# Tomasz Malisiewicz
	#
	# %AUTHORS_END%
	# --------------------------------------------------------------------*/
	# %BANNER_END%

	from pathlib import Path
	import argparse
	import random
	import numpy as np
	import matplotlib.cm as cm
	import torch


	from models.matching import Matching
	from models.utils import (
	compute_pose_error,
	compute_epipolar_error,
	estimate_pose,
	make_matching_plot,
	error_colormap,
	AverageTimer,
	pose_auc,
	read_image,
	rotate_intrinsics,
	rotate_pose_inplane,
	scale_intrinsics,
	)

	torch.set_grad_enabled(False)


	if __name__ == "__main__":
	parser = argparse.ArgumentParser(
	description="Image pair matching and pose evaluation with SuperGlue",
	formatter_class=argparse.ArgumentDefaultsHelpFormatter,
	)

	parser.add_argument(
	"--input_pairs",
	type=str,
	default="assets/scannet_sample_pairs_with_gt.txt",
	help="Path to the list of image pairs",
	)
	parser.add_argument(
	"--input_dir",
	type=str,
	default="assets/scannet_sample_images/",
	help="Path to the directory that contains the images",
	)
	parser.add_argument(
	"--output_dir",
	type=str,
	default="dump_match_pairs/",
	help="Path to the directory in which the .npz results and optionally,"
	"the visualization images are written",
	)

	parser.add_argument(
	"--max_length", type=int, default=-1, help="Maximum number of pairs to evaluate"
	)
	parser.add_argument(
	"--resize",
	type=int,
	nargs="+",
	default=[640, 480],
	help="Resize the input image before running inference. If two numbers, "
	"resize to the exact dimensions, if one number, resize the max "
	"dimension, if -1, do not resize",
	)
	parser.add_argument(
	"--resize_float",
	action="store_true",
	help="Resize the image after casting uint8 to float",
	)

	parser.add_argument(
	"--superglue",
	choices={"indoor", "outdoor"},
	default="indoor",
	help="SuperGlue weights",
	)
	parser.add_argument(
	"--max_keypoints",
	type=int,
	default=1024,
	help="Maximum number of keypoints detected by Superpoint"
	" ('-1' keeps all keypoints)",
	)
	parser.add_argument(
	"--keypoint_threshold",
	type=float,
	default=0.005,
	help="SuperPoint keypoint detector confidence threshold",
	)
	parser.add_argument(
	"--nms_radius",
	type=int,
	default=4,
	help="SuperPoint Non Maximum Suppression (NMS) radius" " (Must be positive)",
	)
	parser.add_argument(
	"--sinkhorn_iterations",
	type=int,
	default=20,
	help="Number of Sinkhorn iterations performed by SuperGlue",
	)
	parser.add_argument(
	"--match_threshold", type=float, default=0.2, help="SuperGlue match threshold"
	)

	parser.add_argument(
	"--viz", action="store_true", help="Visualize the matches and dump the plots"
	)
	parser.add_argument(
	"--eval",
	action="store_true",
	help="Perform the evaluation" " (requires ground truth pose and intrinsics)",
	)
	parser.add_argument(
	"--fast_viz",
	action="store_true",
	help="Use faster image visualization with OpenCV instead of Matplotlib",
	)
	parser.add_argument(
	"--cache",
	action="store_true",
	help="Skip the pair if output .npz files are already found",
	)
	parser.add_argument(
	"--show_keypoints",
	action="store_true",
	help="Plot the keypoints in addition to the matches",
	)
	parser.add_argument(
	"--viz_extension",
	type=str,
	default="png",
	choices=["png", "pdf"],
	help="Visualization file extension. Use pdf for highest-quality.",
	)
	parser.add_argument(
	"--opencv_display",
	action="store_true",
	help="Visualize via OpenCV before saving output images",
	)
	parser.add_argument(
	"--shuffle",
	action="store_true",
	help="Shuffle ordering of pairs before processing",
	)
	parser.add_argument(
	"--force_cpu", action="store_true", help="Force pytorch to run in CPU mode."
	)

	opt = parser.parse_args()
	print(opt)

	assert not (
	opt.opencv_display and not opt.viz
	), "Must use --viz with --opencv_display"
	assert not (
	opt.opencv_display and not opt.fast_viz
	), "Cannot use --opencv_display without --fast_viz"
	assert not (opt.fast_viz and not opt.viz), "Must use --viz with --fast_viz"
	assert not (
	opt.fast_viz and opt.viz_extension == "pdf"
	), "Cannot use pdf extension with --fast_viz"

	if len(opt.resize) == 2 and opt.resize[1] == -1:
	opt.resize = opt.resize[0:1]
	if len(opt.resize) == 2:
	print("Will resize to {}x{} (WxH)".format(opt.resize[0], opt.resize[1]))
	elif len(opt.resize) == 1 and opt.resize[0] > 0:
	print("Will resize max dimension to {}".format(opt.resize[0]))
	elif len(opt.resize) == 1:
	print("Will not resize images")
	else:
	raise ValueError("Cannot specify more than two integers for --resize")

	with open(opt.input_pairs, "r") as f:
	pairs = [l.split() for l in f.readlines()]

	if opt.max_length > -1:
	pairs = pairs[0 : np.min([len(pairs), opt.max_length])]

	if opt.shuffle:
	random.Random(0).shuffle(pairs)

	if opt.eval:
	if not all([len(p) == 38 for p in pairs]):
	raise ValueError(
	"All pairs should have ground truth info for evaluation."
	'File "{}" needs 38 valid entries per row'.format(opt.input_pairs)
	)

	# Load the SuperPoint and SuperGlue models.
	device = "cuda" if torch.cuda.is_available() and not opt.force_cpu else "cpu"
	print('Running inference on device "{}"'.format(device))
	config = {
	"superpoint": {
	"nms_radius": opt.nms_radius,
	"keypoint_threshold": opt.keypoint_threshold,
	"max_keypoints": opt.max_keypoints,
	},
	"superglue": {
	"weights": opt.superglue,
	"sinkhorn_iterations": opt.sinkhorn_iterations,
	"match_threshold": opt.match_threshold,
	},
	}
	matching = Matching(config).eval().to(device)

	# Create the output directories if they do not exist already.
	input_dir = Path(opt.input_dir)
	print('Looking for data in directory "{}"'.format(input_dir))
	output_dir = Path(opt.output_dir)
	output_dir.mkdir(exist_ok=True, parents=True)
	print('Will write matches to directory "{}"'.format(output_dir))
	if opt.eval:
	print("Will write evaluation results", 'to directory "{}"'.format(output_dir))
	if opt.viz:
	print("Will write visualization images to", 'directory "{}"'.format(output_dir))

	timer = AverageTimer(newline=True)
	for i, pair in enumerate(pairs):
	name0, name1 = pair[:2]
	stem0, stem1 = Path(name0).stem, Path(name1).stem
	matches_path = output_dir / "{}_{}_matches.npz".format(stem0, stem1)
	eval_path = output_dir / "{}_{}_evaluation.npz".format(stem0, stem1)
	viz_path = output_dir / "{}_{}_matches.{}".format(
	stem0, stem1, opt.viz_extension
	)
	viz_eval_path = output_dir / "{}_{}_evaluation.{}".format(
	stem0, stem1, opt.viz_extension
	)

	# Handle --cache logic.
	do_match = True
	do_eval = opt.eval
	do_viz = opt.viz
	do_viz_eval = opt.eval and opt.viz
	if opt.cache:
	if matches_path.exists():
	try:
	results = np.load(matches_path)
	except:
	raise IOError("Cannot load matches .npz file: %s" % matches_path)

	kpts0, kpts1 = results["keypoints0"], results["keypoints1"]
	matches, conf = results["matches"], results["match_confidence"]
	do_match = False
	if opt.eval and eval_path.exists():
	try:
	results = np.load(eval_path)
	except:
	raise IOError("Cannot load eval .npz file: %s" % eval_path)
	err_R, err_t = results["error_R"], results["error_t"]
	precision = results["precision"]
	matching_score = results["matching_score"]
	num_correct = results["num_correct"]
	epi_errs = results["epipolar_errors"]
	do_eval = False
	if opt.viz and viz_path.exists():
	do_viz = False
	if opt.viz and opt.eval and viz_eval_path.exists():
	do_viz_eval = False
	timer.update("load_cache")

	if not (do_match or do_eval or do_viz or do_viz_eval):
	timer.print("Finished pair {:5} of {:5}".format(i, len(pairs)))
	continue

	# If a rotation integer is provided (e.g. from EXIF data), use it:
	if len(pair) >= 5:
	rot0, rot1 = int(pair[2]), int(pair[3])
	else:
	rot0, rot1 = 0, 0

	# Load the image pair.
	image0, inp0, scales0 = read_image(
	input_dir / name0, device, opt.resize, rot0, opt.resize_float
	)
	image1, inp1, scales1 = read_image(
	input_dir / name1, device, opt.resize, rot1, opt.resize_float
	)
	if image0 is None or image1 is None:
	print(
	"Problem reading image pair: {} {}".format(
	input_dir / name0, input_dir / name1
	)
	)
	exit(1)
	timer.update("load_image")

	if do_match:
	# Perform the matching.
	pred = matching({"image0": inp0, "image1": inp1})
	pred = {k: v[0].cpu().numpy() for k, v in pred.items()}
	kpts0, kpts1 = pred["keypoints0"], pred["keypoints1"]
	matches, conf = pred["matches0"], pred["matching_scores0"]
	timer.update("matcher")

	# Write the matches to disk.
	out_matches = {
	"keypoints0": kpts0,
	"keypoints1": kpts1,
	"matches": matches,
	"match_confidence": conf,
	}
	np.savez(str(matches_path), **out_matches)

	# Keep the matching keypoints.
	valid = matches > -1
	mkpts0 = kpts0[valid]
	mkpts1 = kpts1[matches[valid]]
	mconf = conf[valid]

	if do_eval:
	# Estimate the pose and compute the pose error.
	assert len(pair) == 38, "Pair does not have ground truth info"
	K0 = np.array(pair[4:13]).astype(float).reshape(3, 3)
	K1 = np.array(pair[13:22]).astype(float).reshape(3, 3)
	T_0to1 = np.array(pair[22:]).astype(float).reshape(4, 4)

	# Scale the intrinsics to resized image.
	K0 = scale_intrinsics(K0, scales0)
	K1 = scale_intrinsics(K1, scales1)

	# Update the intrinsics + extrinsics if EXIF rotation was found.
	if rot0 != 0 or rot1 != 0:
	cam0_T_w = np.eye(4)
	cam1_T_w = T_0to1
	if rot0 != 0:
	K0 = rotate_intrinsics(K0, image0.shape, rot0)
	cam0_T_w = rotate_pose_inplane(cam0_T_w, rot0)
	if rot1 != 0:
	K1 = rotate_intrinsics(K1, image1.shape, rot1)
	cam1_T_w = rotate_pose_inplane(cam1_T_w, rot1)
	cam1_T_cam0 = cam1_T_w @ np.linalg.inv(cam0_T_w)
	T_0to1 = cam1_T_cam0

	epi_errs = compute_epipolar_error(mkpts0, mkpts1, T_0to1, K0, K1)
	correct = epi_errs < 5e-4
	num_correct = np.sum(correct)
	precision = np.mean(correct) if len(correct) > 0 else 0
	matching_score = num_correct / len(kpts0) if len(kpts0) > 0 else 0

	thresh = 1.0 # In pixels relative to resized image size.
	ret = estimate_pose(mkpts0, mkpts1, K0, K1, thresh)
	if ret is None:
	err_t, err_R = np.inf, np.inf
	else:
	R, t, inliers = ret
	err_t, err_R = compute_pose_error(T_0to1, R, t)

	# Write the evaluation results to disk.
	out_eval = {
	"error_t": err_t,
	"error_R": err_R,
	"precision": precision,
	"matching_score": matching_score,
	"num_correct": num_correct,
	"epipolar_errors": epi_errs,
	}
	np.savez(str(eval_path), **out_eval)
	timer.update("eval")

	if do_viz:
	# Visualize the matches.
	color = cm.jet(mconf)
	text = [
	"SuperGlue",
	"Keypoints: {}:{}".format(len(kpts0), len(kpts1)),
	"Matches: {}".format(len(mkpts0)),
	]
	if rot0 != 0 or rot1 != 0:
	text.append("Rotation: {}:{}".format(rot0, rot1))

	# Display extra parameter info.
	k_thresh = matching.superpoint.config["keypoint_threshold"]
	m_thresh = matching.superglue.config["match_threshold"]
	small_text = [
	"Keypoint Threshold: {:.4f}".format(k_thresh),
	"Match Threshold: {:.2f}".format(m_thresh),
	"Image Pair: {}:{}".format(stem0, stem1),
	]

	make_matching_plot(
	image0,
	image1,
	kpts0,
	kpts1,
	mkpts0,
	mkpts1,
	color,
	text,
	viz_path,
	opt.show_keypoints,
	opt.fast_viz,
	opt.opencv_display,
	"Matches",
	small_text,
	)

	timer.update("viz_match")

	if do_viz_eval:
	# Visualize the evaluation results for the image pair.
	color = np.clip((epi_errs - 0) / (1e-3 - 0), 0, 1)
	color = error_colormap(1 - color)
	deg, delta = " deg", "Delta "
	if not opt.fast_viz:
	deg, delta = "°", "$\\Delta$"
	e_t = "FAIL" if np.isinf(err_t) else "{:.1f}{}".format(err_t, deg)
	e_R = "FAIL" if np.isinf(err_R) else "{:.1f}{}".format(err_R, deg)
	text = [
	"SuperGlue",
	"{}R: {}".format(delta, e_R),
	"{}t: {}".format(delta, e_t),
	"inliers: {}/{}".format(num_correct, (matches > -1).sum()),
	]
	if rot0 != 0 or rot1 != 0:
	text.append("Rotation: {}:{}".format(rot0, rot1))

	# Display extra parameter info (only works with --fast_viz).
	k_thresh = matching.superpoint.config["keypoint_threshold"]
	m_thresh = matching.superglue.config["match_threshold"]
	small_text = [
	"Keypoint Threshold: {:.4f}".format(k_thresh),
	"Match Threshold: {:.2f}".format(m_thresh),
	"Image Pair: {}:{}".format(stem0, stem1),
	]

	make_matching_plot(
	image0,
	image1,
	kpts0,
	kpts1,
	mkpts0,
	mkpts1,
	color,
	text,
	viz_eval_path,
	opt.show_keypoints,
	opt.fast_viz,
	opt.opencv_display,
	"Relative Pose",
	small_text,
	)

	timer.update("viz_eval")

	timer.print("Finished pair {:5} of {:5}".format(i, len(pairs)))

	if opt.eval:
	# Collate the results into a final table and print to terminal.
	pose_errors = []
	precisions = []
	matching_scores = []
	for pair in pairs:
	name0, name1 = pair[:2]
	stem0, stem1 = Path(name0).stem, Path(name1).stem
	eval_path = output_dir / "{}_{}_evaluation.npz".format(stem0, stem1)
	results = np.load(eval_path)
	pose_error = np.maximum(results["error_t"], results["error_R"])
	pose_errors.append(pose_error)
	precisions.append(results["precision"])
	matching_scores.append(results["matching_score"])
	thresholds = [5, 10, 20]
	aucs = pose_auc(pose_errors, thresholds)
	aucs = [100.0 * yy for yy in aucs]
	prec = 100.0 * np.mean(precisions)
	ms = 100.0 * np.mean(matching_scores)
	print("Evaluation Results (mean over {} pairs):".format(len(pairs)))
	print("AUC@5\t AUC@10\t AUC@20\t Prec\t MScore\t")
	print(
	"{:.2f}\t {:.2f}\t {:.2f}\t {:.2f}\t {:.2f}\t".format(
	aucs[0], aucs[1], aucs[2], prec, ms
	)
	)