LivePortrait / src /utils /landmark_runner.py
cleardusk's picture
Run insightface on CPU and adapt the code to make it work with ZeroGPU (#1)
add5fb2 verified
# coding: utf-8
import os.path as osp
import cv2; cv2.setNumThreads(0); cv2.ocl.setUseOpenCL(False)
import torch
import numpy as np
import onnxruntime
from .timer import Timer
from .rprint import rlog
from .crop import crop_image, _transform_pts
def make_abs_path(fn):
return osp.join(osp.dirname(osp.realpath(__file__)), fn)
def to_ndarray(obj):
if isinstance(obj, torch.Tensor):
return obj.cpu().numpy()
elif isinstance(obj, np.ndarray):
return obj
else:
return np.array(obj)
class LandmarkRunner(object):
"""landmark runner"""
def __init__(self, **kwargs):
ckpt_path = kwargs.get('ckpt_path')
onnx_provider = 'cpu' #run on cpu for it to work with ZeroGPU // kwargs.get('onnx_provider', 'cuda') # 默认用cuda
device_id = kwargs.get('device_id', 0)
self.dsize = kwargs.get('dsize', 224)
self.timer = Timer()
if onnx_provider.lower() == 'cuda':
self.session = onnxruntime.InferenceSession(
ckpt_path, providers=[
('CUDAExecutionProvider', {'device_id': device_id})
]
)
else:
opts = onnxruntime.SessionOptions()
opts.intra_op_num_threads = 4 # 默认线程数为 4
self.session = onnxruntime.InferenceSession(
ckpt_path, providers=['CPUExecutionProvider'],
sess_options=opts
)
def _run(self, inp):
out = self.session.run(None, {'input': inp})
return out
def run(self, img_rgb: np.ndarray, lmk=None):
if lmk is not None:
crop_dct = crop_image(img_rgb, lmk, dsize=self.dsize, scale=1.5, vy_ratio=-0.1)
img_crop_rgb = crop_dct['img_crop']
else:
img_crop_rgb = cv2.resize(img_rgb, (self.dsize, self.dsize))
scale = max(img_rgb.shape[:2]) / self.dsize
crop_dct = {
'M_c2o': np.array([
[scale, 0., 0.],
[0., scale, 0.],
[0., 0., 1.],
], dtype=np.float32),
}
inp = (img_crop_rgb.astype(np.float32) / 255.).transpose(2, 0, 1)[None, ...] # HxWx3 (BGR) -> 1x3xHxW (RGB!)
out_lst = self._run(inp)
out_pts = out_lst[2]
pts = to_ndarray(out_pts[0]).reshape(-1, 2) * self.dsize # scale to 0-224
pts = _transform_pts(pts, M=crop_dct['M_c2o'])
return {
'pts': pts, # 2d landmarks 203 points
}
def warmup(self):
# 构造dummy image进行warmup
self.timer.tic()
dummy_image = np.zeros((1, 3, self.dsize, self.dsize), dtype=np.float32)
_ = self._run(dummy_image)
elapse = self.timer.toc()
rlog(f'LandmarkRunner warmup time: {elapse:.3f}s')