Spaces:
Running
Running
import argparse | |
import numpy as np | |
import imageio | |
import torch | |
from tqdm import tqdm | |
import scipy | |
import scipy.io | |
import scipy.misc | |
from lib.model_test import D2Net | |
from lib.utils import preprocess_image | |
from lib.pyramid import process_multiscale | |
# CUDA | |
use_cuda = torch.cuda.is_available() | |
device = torch.device("cuda:0" if use_cuda else "cpu") | |
# Argument parsing | |
parser = argparse.ArgumentParser(description='Feature extraction script') | |
parser.add_argument( | |
'--image_list_file', type=str, required=True, | |
help='path to a file containing a list of images to process' | |
) | |
parser.add_argument( | |
'--preprocessing', type=str, default='caffe', | |
help='image preprocessing (caffe or torch)' | |
) | |
parser.add_argument( | |
'--model_file', type=str, default='models/d2_tf.pth', | |
help='path to the full model' | |
) | |
parser.add_argument( | |
'--max_edge', type=int, default=1600, | |
help='maximum image size at network input' | |
) | |
parser.add_argument( | |
'--max_sum_edges', type=int, default=2800, | |
help='maximum sum of image sizes at network input' | |
) | |
parser.add_argument( | |
'--output_extension', type=str, default='.d2-net', | |
help='extension for the output' | |
) | |
parser.add_argument( | |
'--output_type', type=str, default='npz', | |
help='output file type (npz or mat)' | |
) | |
parser.add_argument( | |
'--multiscale', dest='multiscale', action='store_true', | |
help='extract multiscale features' | |
) | |
parser.set_defaults(multiscale=False) | |
parser.add_argument( | |
'--no-relu', dest='use_relu', action='store_false', | |
help='remove ReLU after the dense feature extraction module' | |
) | |
parser.set_defaults(use_relu=True) | |
args = parser.parse_args() | |
print(args) | |
# Creating CNN model | |
model = D2Net( | |
model_file=args.model_file, | |
use_relu=args.use_relu, | |
use_cuda=use_cuda | |
) | |
# Process the file | |
with open(args.image_list_file, 'r') as f: | |
lines = f.readlines() | |
for line in tqdm(lines, total=len(lines)): | |
path = line.strip() | |
image = imageio.imread(path) | |
if len(image.shape) == 2: | |
image = image[:, :, np.newaxis] | |
image = np.repeat(image, 3, -1) | |
# TODO: switch to PIL.Image due to deprecation of scipy.misc.imresize. | |
resized_image = image | |
if max(resized_image.shape) > args.max_edge: | |
resized_image = scipy.misc.imresize( | |
resized_image, | |
args.max_edge / max(resized_image.shape) | |
).astype('float') | |
if sum(resized_image.shape[: 2]) > args.max_sum_edges: | |
resized_image = scipy.misc.imresize( | |
resized_image, | |
args.max_sum_edges / sum(resized_image.shape[: 2]) | |
).astype('float') | |
fact_i = image.shape[0] / resized_image.shape[0] | |
fact_j = image.shape[1] / resized_image.shape[1] | |
input_image = preprocess_image( | |
resized_image, | |
preprocessing=args.preprocessing | |
) | |
with torch.no_grad(): | |
if args.multiscale: | |
keypoints, scores, descriptors = process_multiscale( | |
torch.tensor( | |
input_image[np.newaxis, :, :, :].astype(np.float32), | |
device=device | |
), | |
model | |
) | |
else: | |
keypoints, scores, descriptors = process_multiscale( | |
torch.tensor( | |
input_image[np.newaxis, :, :, :].astype(np.float32), | |
device=device | |
), | |
model, | |
scales=[1] | |
) | |
# Input image coordinates | |
keypoints[:, 0] *= fact_i | |
keypoints[:, 1] *= fact_j | |
# i, j -> u, v | |
keypoints = keypoints[:, [1, 0, 2]] | |
if args.output_type == 'npz': | |
with open(path + args.output_extension, 'wb') as output_file: | |
np.savez( | |
output_file, | |
keypoints=keypoints, | |
scores=scores, | |
descriptors=descriptors | |
) | |
elif args.output_type == 'mat': | |
with open(path + args.output_extension, 'wb') as output_file: | |
scipy.io.savemat( | |
output_file, | |
{ | |
'keypoints': keypoints, | |
'scores': scores, | |
'descriptors': descriptors | |
} | |
) | |
else: | |
raise ValueError('Unknown output type.') | |