Spaces:
Running
Running
import logging | |
import os | |
import cv2 | |
import torch | |
from copy import deepcopy | |
import torch.nn.functional as F | |
from torchvision.transforms import ToTensor | |
import math | |
from alnet import ALNet | |
from soft_detect import DKD | |
import time | |
configs = { | |
'alike-t': {'c1': 8, 'c2': 16, 'c3': 32, 'c4': 64, 'dim': 64, 'single_head': True, 'radius': 2, | |
'model_path': os.path.join(os.path.split(__file__)[0], 'models', 'alike-t.pth')}, | |
'alike-s': {'c1': 8, 'c2': 16, 'c3': 48, 'c4': 96, 'dim': 96, 'single_head': True, 'radius': 2, | |
'model_path': os.path.join(os.path.split(__file__)[0], 'models', 'alike-s.pth')}, | |
'alike-n': {'c1': 16, 'c2': 32, 'c3': 64, 'c4': 128, 'dim': 128, 'single_head': True, 'radius': 2, | |
'model_path': os.path.join(os.path.split(__file__)[0], 'models', 'alike-n.pth')}, | |
'alike-l': {'c1': 32, 'c2': 64, 'c3': 128, 'c4': 128, 'dim': 128, 'single_head': False, 'radius': 2, | |
'model_path': os.path.join(os.path.split(__file__)[0], 'models', 'alike-l.pth')}, | |
} | |
class ALike(ALNet): | |
def __init__(self, | |
# ================================== feature encoder | |
c1: int = 32, c2: int = 64, c3: int = 128, c4: int = 128, dim: int = 128, | |
single_head: bool = False, | |
# ================================== detect parameters | |
radius: int = 2, | |
top_k: int = 500, scores_th: float = 0.5, | |
n_limit: int = 5000, | |
device: str = 'cpu', | |
model_path: str = '' | |
): | |
super().__init__(c1, c2, c3, c4, dim, single_head) | |
self.radius = radius | |
self.top_k = top_k | |
self.n_limit = n_limit | |
self.scores_th = scores_th | |
self.dkd = DKD(radius=self.radius, top_k=self.top_k, | |
scores_th=self.scores_th, n_limit=self.n_limit) | |
self.device = device | |
if model_path != '': | |
state_dict = torch.load(model_path, self.device) | |
self.load_state_dict(state_dict) | |
self.to(self.device) | |
self.eval() | |
logging.info(f'Loaded model parameters from {model_path}') | |
logging.info( | |
f"Number of model parameters: {sum(p.numel() for p in self.parameters() if p.requires_grad) / 1e3}KB") | |
def extract_dense_map(self, image, ret_dict=False): | |
# ==================================================== | |
# check image size, should be integer multiples of 2^5 | |
# if it is not a integer multiples of 2^5, padding zeros | |
device = image.device | |
b, c, h, w = image.shape | |
h_ = math.ceil(h / 32) * 32 if h % 32 != 0 else h | |
w_ = math.ceil(w / 32) * 32 if w % 32 != 0 else w | |
if h_ != h: | |
h_padding = torch.zeros(b, c, h_ - h, w, device=device) | |
image = torch.cat([image, h_padding], dim=2) | |
if w_ != w: | |
w_padding = torch.zeros(b, c, h_, w_ - w, device=device) | |
image = torch.cat([image, w_padding], dim=3) | |
# ==================================================== | |
scores_map, descriptor_map = super().forward(image) | |
# ==================================================== | |
if h_ != h or w_ != w: | |
descriptor_map = descriptor_map[:, :, :h, :w] | |
scores_map = scores_map[:, :, :h, :w] # Bx1xHxW | |
# ==================================================== | |
# BxCxHxW | |
descriptor_map = torch.nn.functional.normalize(descriptor_map, p=2, dim=1) | |
if ret_dict: | |
return {'descriptor_map': descriptor_map, 'scores_map': scores_map, } | |
else: | |
return descriptor_map, scores_map | |
def forward(self, img, image_size_max=99999, sort=False, sub_pixel=False): | |
""" | |
:param img: np.array HxWx3, RGB | |
:param image_size_max: maximum image size, otherwise, the image will be resized | |
:param sort: sort keypoints by scores | |
:param sub_pixel: whether to use sub-pixel accuracy | |
:return: a dictionary with 'keypoints', 'descriptors', 'scores', and 'time' | |
""" | |
H, W, three = img.shape | |
assert three == 3, "input image shape should be [HxWx3]" | |
# ==================== image size constraint | |
image = deepcopy(img) | |
max_hw = max(H, W) | |
if max_hw > image_size_max: | |
ratio = float(image_size_max / max_hw) | |
image = cv2.resize(image, dsize=None, fx=ratio, fy=ratio) | |
# ==================== convert image to tensor | |
image = torch.from_numpy(image).to(self.device).to(torch.float32).permute(2, 0, 1)[None] / 255.0 | |
# ==================== extract keypoints | |
start = time.time() | |
with torch.no_grad(): | |
descriptor_map, scores_map = self.extract_dense_map(image) | |
keypoints, descriptors, scores, _ = self.dkd(scores_map, descriptor_map, | |
sub_pixel=sub_pixel) | |
keypoints, descriptors, scores = keypoints[0], descriptors[0], scores[0] | |
keypoints = (keypoints + 1) / 2 * keypoints.new_tensor([[W - 1, H - 1]]) | |
if sort: | |
indices = torch.argsort(scores, descending=True) | |
keypoints = keypoints[indices] | |
descriptors = descriptors[indices] | |
scores = scores[indices] | |
end = time.time() | |
return {'keypoints': keypoints.cpu().numpy(), | |
'descriptors': descriptors.cpu().numpy(), | |
'scores': scores.cpu().numpy(), | |
'scores_map': scores_map.cpu().numpy(), | |
'time': end - start, } | |
if __name__ == '__main__': | |
import numpy as np | |
from thop import profile | |
net = ALike(c1=32, c2=64, c3=128, c4=128, dim=128, single_head=False) | |
image = np.random.random((640, 480, 3)).astype(np.float32) | |
flops, params = profile(net, inputs=(image, 9999, False), verbose=False) | |
print('{:<30} {:<8} GFLops'.format('Computational complexity: ', flops / 1e9)) | |
print('{:<30} {:<8} KB'.format('Number of parameters: ', params / 1e3)) | |