Spaces:
Runtime error
Runtime error
""" | |
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
# See the License for the specific language governing permissions and | |
# limitations under the License. | |
""" | |
# -*- encoding: utf-8 -*- | |
# @Author: SWHL | |
# @Contact: liekkaskono@163.com | |
import sys | |
import warnings | |
from pathlib import Path | |
import cv2 | |
import numpy as np | |
import pyclipper | |
import six | |
import yaml | |
from onnxruntime import (GraphOptimizationLevel, InferenceSession, | |
SessionOptions, get_available_providers, get_device) | |
from shapely.geometry import Polygon | |
root_dir = Path(__file__).resolve().parent.parent | |
class OrtInferSession(): | |
def __init__(self, config): | |
sess_opt = SessionOptions() | |
sess_opt.log_severity_level = 4 | |
sess_opt.enable_cpu_mem_arena = False | |
sess_opt.graph_optimization_level = GraphOptimizationLevel.ORT_ENABLE_ALL | |
cuda_ep = 'CUDAExecutionProvider' | |
cpu_ep = 'CPUExecutionProvider' | |
cpu_provider_options = { | |
"arena_extend_strategy": "kSameAsRequested", | |
} | |
EP_list = [] | |
if config['use_cuda'] and get_device() == 'GPU' \ | |
and cuda_ep in get_available_providers(): | |
EP_list = [(cuda_ep, config[cuda_ep])] | |
EP_list.append((cpu_ep, cpu_provider_options)) | |
config['model_path'] = str(root_dir / config['model_path']) | |
self._verify_model(config['model_path']) | |
self.session = InferenceSession(config['model_path'], | |
sess_options=sess_opt, | |
providers=EP_list) | |
if config['use_cuda'] and cuda_ep not in self.session.get_providers(): | |
warnings.warn(f'{cuda_ep} is not avaiable for current env, the inference part is automatically shifted to be executed under {cpu_ep}.\n' | |
'Please ensure the installed onnxruntime-gpu version matches your cuda and cudnn version, ' | |
'you can check their relations from the offical web site: ' | |
'https://onnxruntime.ai/docs/execution-providers/CUDA-ExecutionProvider.html', | |
RuntimeWarning) | |
def get_input_name(self, input_idx=0): | |
return self.session.get_inputs()[input_idx].name | |
def get_output_name(self, output_idx=0): | |
return self.session.get_outputs()[output_idx].name | |
def _verify_model(model_path): | |
model_path = Path(model_path) | |
if not model_path.exists(): | |
raise FileNotFoundError(f'{model_path} does not exists.') | |
if not model_path.is_file(): | |
raise FileExistsError(f'{model_path} is not a file.') | |
def read_yaml(yaml_path): | |
with open(yaml_path, 'rb') as f: | |
data = yaml.load(f, Loader=yaml.Loader) | |
return data | |
class DecodeImage(): | |
""" decode image """ | |
def __init__(self, img_mode='RGB', channel_first=False): | |
self.img_mode = img_mode | |
self.channel_first = channel_first | |
def __call__(self, data): | |
img = data['image'] | |
if six.PY2: | |
assert type(img) is str and len(img) > 0, "invalid input 'img' in DecodeImage" | |
else: | |
assert type(img) is bytes and len(img) > 0, "invalid input 'img' in DecodeImage" | |
img = np.frombuffer(img, dtype='uint8') | |
img = cv2.imdecode(img, 1) | |
if img is None: | |
return None | |
if self.img_mode == 'GRAY': | |
img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) | |
elif self.img_mode == 'RGB': | |
assert img.shape[2] == 3, f'invalid shape of image[{img.shape}]' | |
img = img[:, :, ::-1] | |
if self.channel_first: | |
img = img.transpose((2, 0, 1)) | |
data['image'] = img | |
return data | |
class NormalizeImage(): | |
""" normalize image such as substract mean, divide std""" | |
def __init__(self, scale=None, mean=None, std=None, order='chw'): | |
if isinstance(scale, str): | |
scale = eval(scale) | |
self.scale = np.float32(scale if scale is not None else 1.0 / 255.0) | |
mean = mean if mean is not None else [0.485, 0.456, 0.406] | |
std = std if std is not None else [0.229, 0.224, 0.225] | |
shape = (3, 1, 1) if order == 'chw' else (1, 1, 3) | |
self.mean = np.array(mean).reshape(shape).astype('float32') | |
self.std = np.array(std).reshape(shape).astype('float32') | |
def __call__(self, data): | |
img = np.array(data['image']).astype(np.float32) | |
data['image'] = (img * self.scale - self.mean) / self.std | |
return data | |
class ToCHWImage(): | |
""" convert hwc image to chw image""" | |
def __init__(self): | |
pass | |
def __call__(self, data): | |
img = np.array(data['image']) | |
data['image'] = img.transpose((2, 0, 1)) | |
return data | |
class KeepKeys(): | |
def __init__(self, keep_keys): | |
self.keep_keys = keep_keys | |
def __call__(self, data): | |
data_list = [] | |
for key in self.keep_keys: | |
data_list.append(data[key]) | |
return data_list | |
class DetResizeForTest(): | |
def __init__(self, **kwargs): | |
super(DetResizeForTest, self).__init__() | |
self.resize_type = 0 | |
if 'image_shape' in kwargs: | |
self.image_shape = kwargs['image_shape'] | |
self.resize_type = 1 | |
elif 'limit_side_len' in kwargs: | |
self.limit_side_len = kwargs.get('limit_side_len', 736) | |
self.limit_type = kwargs.get('limit_type', 'min') | |
if 'resize_long' in kwargs: | |
self.resize_type = 2 | |
self.resize_long = kwargs.get('resize_long', 960) | |
else: | |
self.limit_side_len = kwargs.get('limit_side_len', 736) | |
self.limit_type = kwargs.get('limit_type', 'min') | |
def __call__(self, data): | |
img = data['image'] | |
src_h, src_w = img.shape[:2] | |
if self.resize_type == 0: | |
# img, shape = self.resize_image_type0(img) | |
img, [ratio_h, ratio_w] = self.resize_image_type0(img) | |
elif self.resize_type == 2: | |
img, [ratio_h, ratio_w] = self.resize_image_type2(img) | |
else: | |
# img, shape = self.resize_image_type1(img) | |
img, [ratio_h, ratio_w] = self.resize_image_type1(img) | |
data['image'] = img | |
data['shape'] = np.array([src_h, src_w, ratio_h, ratio_w]) | |
return data | |
def resize_image_type1(self, img): | |
resize_h, resize_w = self.image_shape | |
ori_h, ori_w = img.shape[:2] # (h, w, c) | |
ratio_h = float(resize_h) / ori_h | |
ratio_w = float(resize_w) / ori_w | |
img = cv2.resize(img, (int(resize_w), int(resize_h))) | |
# return img, np.array([ori_h, ori_w]) | |
return img, [ratio_h, ratio_w] | |
def resize_image_type0(self, img): | |
""" | |
resize image to a size multiple of 32 which is required by the network | |
args: | |
img(array): array with shape [h, w, c] | |
return(tuple): | |
img, (ratio_h, ratio_w) | |
""" | |
limit_side_len = self.limit_side_len | |
h, w = img.shape[:2] | |
# limit the max side | |
if self.limit_type == 'max': | |
if max(h, w) > limit_side_len: | |
if h > w: | |
ratio = float(limit_side_len) / h | |
else: | |
ratio = float(limit_side_len) / w | |
else: | |
ratio = 1. | |
else: | |
if min(h, w) < limit_side_len: | |
if h < w: | |
ratio = float(limit_side_len) / h | |
else: | |
ratio = float(limit_side_len) / w | |
else: | |
ratio = 1. | |
resize_h = int(h * ratio) | |
resize_w = int(w * ratio) | |
resize_h = int(round(resize_h / 32) * 32) | |
resize_w = int(round(resize_w / 32) * 32) | |
try: | |
if int(resize_w) <= 0 or int(resize_h) <= 0: | |
return None, (None, None) | |
img = cv2.resize(img, (int(resize_w), int(resize_h))) | |
except: | |
print(img.shape, resize_w, resize_h) | |
sys.exit(0) | |
ratio_h = resize_h / float(h) | |
ratio_w = resize_w / float(w) | |
return img, [ratio_h, ratio_w] | |
def resize_image_type2(self, img): | |
h, w = img.shape[:2] | |
resize_w = w | |
resize_h = h | |
# Fix the longer side | |
if resize_h > resize_w: | |
ratio = float(self.resize_long) / resize_h | |
else: | |
ratio = float(self.resize_long) / resize_w | |
resize_h = int(resize_h * ratio) | |
resize_w = int(resize_w * ratio) | |
max_stride = 128 | |
resize_h = (resize_h + max_stride - 1) // max_stride * max_stride | |
resize_w = (resize_w + max_stride - 1) // max_stride * max_stride | |
img = cv2.resize(img, (int(resize_w), int(resize_h))) | |
ratio_h = resize_h / float(h) | |
ratio_w = resize_w / float(w) | |
return img, [ratio_h, ratio_w] | |
def transform(data, ops=None): | |
""" transform """ | |
if ops is None: | |
ops = [] | |
for op in ops: | |
data = op(data) | |
if data is None: | |
return None | |
return data | |
def create_operators(op_param_dict): | |
""" | |
create operators based on the config | |
""" | |
ops = [] | |
for op_name, param in op_param_dict.items(): | |
if param is None: | |
param = {} | |
op = eval(op_name)(**param) | |
ops.append(op) | |
return ops | |
def draw_text_det_res(dt_boxes, img_path): | |
src_im = cv2.imread(img_path) | |
for box in dt_boxes: | |
box = np.array(box).astype(np.int32).reshape(-1, 2) | |
cv2.polylines(src_im, [box], True, | |
color=(255, 255, 0), thickness=2) | |
return src_im | |
class DBPostProcess(): | |
"""The post process for Differentiable Binarization (DB).""" | |
def __init__(self, | |
thresh=0.3, | |
box_thresh=0.7, | |
max_candidates=1000, | |
unclip_ratio=2.0, | |
score_mode="fast", | |
use_dilation=False): | |
self.thresh = thresh | |
self.box_thresh = box_thresh | |
self.max_candidates = max_candidates | |
self.unclip_ratio = unclip_ratio | |
self.min_size = 3 | |
self.score_mode = score_mode | |
if use_dilation: | |
self.dilation_kernel = np.array([[1, 1], [1, 1]]) | |
else: | |
self.dilation_kernel = None | |
def boxes_from_bitmap(self, pred, _bitmap, dest_width, dest_height): | |
''' | |
_bitmap: single map with shape (1, H, W), | |
whose values are binarized as {0, 1} | |
''' | |
bitmap = _bitmap | |
height, width = bitmap.shape | |
outs = cv2.findContours((bitmap * 255).astype(np.uint8), cv2.RETR_LIST, | |
cv2.CHAIN_APPROX_SIMPLE) | |
if len(outs) == 3: | |
img, contours, _ = outs[0], outs[1], outs[2] | |
elif len(outs) == 2: | |
contours, _ = outs[0], outs[1] | |
num_contours = min(len(contours), self.max_candidates) | |
boxes = [] | |
scores = [] | |
for index in range(num_contours): | |
contour = contours[index] | |
points, sside = self.get_mini_boxes(contour) | |
if sside < self.min_size: | |
continue | |
points = np.array(points) | |
if self.score_mode == "fast": | |
score = self.box_score_fast(pred, points.reshape(-1, 2)) | |
else: | |
score = self.box_score_slow(pred, contour) | |
if self.box_thresh > score: | |
continue | |
box = self.unclip(points).reshape(-1, 1, 2) | |
box, sside = self.get_mini_boxes(box) | |
if sside < self.min_size + 2: | |
continue | |
box = np.array(box) | |
box[:, 0] = np.clip( | |
np.round(box[:, 0] / width * dest_width), 0, dest_width) | |
box[:, 1] = np.clip( | |
np.round(box[:, 1] / height * dest_height), 0, dest_height) | |
boxes.append(box.astype(np.int16)) | |
scores.append(score) | |
return np.array(boxes, dtype=np.int16), scores | |
def unclip(self, box): | |
unclip_ratio = self.unclip_ratio | |
poly = Polygon(box) | |
distance = poly.area * unclip_ratio / poly.length | |
offset = pyclipper.PyclipperOffset() | |
offset.AddPath(box, pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON) | |
expanded = np.array(offset.Execute(distance)) | |
return expanded | |
def get_mini_boxes(self, contour): | |
bounding_box = cv2.minAreaRect(contour) | |
points = sorted(list(cv2.boxPoints(bounding_box)), key=lambda x: x[0]) | |
index_1, index_2, index_3, index_4 = 0, 1, 2, 3 | |
if points[1][1] > points[0][1]: | |
index_1 = 0 | |
index_4 = 1 | |
else: | |
index_1 = 1 | |
index_4 = 0 | |
if points[3][1] > points[2][1]: | |
index_2 = 2 | |
index_3 = 3 | |
else: | |
index_2 = 3 | |
index_3 = 2 | |
box = [ | |
points[index_1], points[index_2], points[index_3], points[index_4] | |
] | |
return box, min(bounding_box[1]) | |
def box_score_fast(self, bitmap, _box): | |
h, w = bitmap.shape[:2] | |
box = _box.copy() | |
xmin = np.clip(np.floor(box[:, 0].min()).astype(np.int32), 0, w - 1) | |
xmax = np.clip(np.ceil(box[:, 0].max()).astype(np.int32), 0, w - 1) | |
ymin = np.clip(np.floor(box[:, 1].min()).astype(np.int32), 0, h - 1) | |
ymax = np.clip(np.ceil(box[:, 1].max()).astype(np.int32), 0, h - 1) | |
mask = np.zeros((ymax - ymin + 1, xmax - xmin + 1), dtype=np.uint8) | |
box[:, 0] = box[:, 0] - xmin | |
box[:, 1] = box[:, 1] - ymin | |
cv2.fillPoly(mask, box.reshape(1, -1, 2).astype(np.int32), 1) | |
return cv2.mean(bitmap[ymin:ymax + 1, xmin:xmax + 1], mask)[0] | |
def box_score_slow(self, bitmap, contour): | |
''' | |
box_score_slow: use polyon mean score as the mean score | |
''' | |
h, w = bitmap.shape[:2] | |
contour = contour.copy() | |
contour = np.reshape(contour, (-1, 2)) | |
xmin = np.clip(np.min(contour[:, 0]), 0, w - 1) | |
xmax = np.clip(np.max(contour[:, 0]), 0, w - 1) | |
ymin = np.clip(np.min(contour[:, 1]), 0, h - 1) | |
ymax = np.clip(np.max(contour[:, 1]), 0, h - 1) | |
mask = np.zeros((ymax - ymin + 1, xmax - xmin + 1), dtype=np.uint8) | |
contour[:, 0] = contour[:, 0] - xmin | |
contour[:, 1] = contour[:, 1] - ymin | |
cv2.fillPoly(mask, contour.reshape(1, -1, 2).astype(np.int32), 1) | |
return cv2.mean(bitmap[ymin:ymax + 1, xmin:xmax + 1], mask)[0] | |
def __call__(self, pred, shape_list): | |
pred = pred[:, 0, :, :] | |
segmentation = pred > self.thresh | |
boxes_batch = [] | |
for batch_index in range(pred.shape[0]): | |
src_h, src_w, ratio_h, ratio_w = shape_list[batch_index] | |
if self.dilation_kernel is not None: | |
mask = cv2.dilate( | |
np.array(segmentation[batch_index]).astype(np.uint8), | |
self.dilation_kernel) | |
else: | |
mask = segmentation[batch_index] | |
boxes, scores = self.boxes_from_bitmap(pred[batch_index], mask, | |
src_w, src_h) | |
boxes_batch.append({'points': boxes}) | |
return boxes_batch | |