Spaces:
Running
Running
# -*- encoding: utf-8 -*- | |
import math | |
import random | |
import time | |
from pathlib import Path | |
import cv2 | |
import gradio as gr | |
import numpy as np | |
from PIL import Image, ImageDraw, ImageFont | |
from rapidocr_onnxruntime import RapidOCR | |
def draw_ocr_box_txt(image, boxes, txts, font_path, | |
scores=None, text_score=0.5): | |
h, w = image.height, image.width | |
img_left = image.copy() | |
img_right = Image.new('RGB', (w, h), (255, 255, 255)) | |
random.seed(0) | |
draw_left = ImageDraw.Draw(img_left) | |
draw_right = ImageDraw.Draw(img_right) | |
for idx, (box, txt) in enumerate(zip(boxes, txts)): | |
if scores is not None and float(scores[idx]) < text_score: | |
continue | |
color = (random.randint(0, 255), | |
random.randint(0, 255), | |
random.randint(0, 255)) | |
box = [tuple(v) for v in box] | |
draw_left.polygon(box, fill=color) | |
draw_right.polygon([box[0][0], box[0][1], | |
box[1][0], box[1][1], | |
box[2][0], box[2][1], | |
box[3][0], box[3][1]], | |
outline=color) | |
box_height = math.sqrt((box[0][0] - box[3][0])**2 | |
+ (box[0][1] - box[3][1])**2) | |
box_width = math.sqrt((box[0][0] - box[1][0])**2 | |
+ (box[0][1] - box[1][1])**2) | |
if box_height > 2 * box_width: | |
font_size = max(int(box_width * 0.9), 10) | |
font = ImageFont.truetype(font_path, font_size, | |
encoding="utf-8") | |
cur_y = box[0][1] | |
for c in txt: | |
char_size = font.getsize(c) | |
draw_right.text((box[0][0] + 3, cur_y), c, | |
fill=(0, 0, 0), font=font) | |
cur_y += char_size[1] | |
else: | |
font_size = max(int(box_height * 0.8), 10) | |
font = ImageFont.truetype(font_path, font_size, encoding="utf-8") | |
draw_right.text([box[0][0], box[0][1]], txt, | |
fill=(0, 0, 0), font=font) | |
img_left = Image.blend(image, img_left, 0.5) | |
img_show = Image.new('RGB', (w * 2, h), (255, 255, 255)) | |
img_show.paste(img_left, (0, 0, w, h)) | |
img_show.paste(img_right, (w, 0, w * 2, h)) | |
return np.array(img_show) | |
def visualize(image_path, boxes, txts, scores, | |
font_path="./FZYTK.TTF"): | |
image = Image.open(image_path) | |
draw_img = draw_ocr_box_txt(image, boxes, | |
txts, font_path, | |
scores, | |
text_score=0.5) | |
draw_img_save = Path("./inference_results/") | |
if not draw_img_save.exists(): | |
draw_img_save.mkdir(parents=True, exist_ok=True) | |
time_stamp = time.strftime('%Y-%m-%d-%H-%M-%S', time.localtime(time.time())) | |
image_save = str(draw_img_save / f'{time_stamp}_{Path(image_path).name}') | |
cv2.imwrite(image_save, draw_img[:, :, ::-1]) | |
return image_save | |
def inference(img_path, box_thresh=0.5, unclip_ratio=1.6, text_score=0.5): | |
img = cv2.imread(img_path) | |
ocr_result, _ = rapid_ocr(img, box_thresh=box_thresh, | |
unclip_ratio=unclip_ratio, | |
text_score=text_score) | |
if not ocr_result: | |
return img_path, '未识别到有效文本' | |
dt_boxes, rec_res, scores = list(zip(*ocr_result)) | |
img_save_path = visualize(img_path, dt_boxes, rec_res, scores) | |
output_text = [f'{one_rec} {float(score):.4f}' | |
for one_rec, score in zip(rec_res, scores)] | |
return img_save_path, output_text | |
rapid_ocr = RapidOCR() | |
examples = [['images/1.jpg']] | |
with gr.Blocks(title='RapidOCR') as demo: | |
gr.Markdown(""" | |
<h1><center><a href="https://github.com/RapidAI/RapidOCR" target="_blank">Rapid⚡OCR</a></center></h1> | |
### Docs: [Docs](https://rapidocr.rtfd.io/) | |
### Parameters docs: [link](https://github.com/RapidAI/RapidOCR/tree/main/python#configyaml%E4%B8%AD%E5%B8%B8%E7%94%A8%E5%8F%82%E6%95%B0%E4%BB%8B%E7%BB%8D) | |
- **box_thresh**: 检测到的框是文本的概率,值越大,框中是文本的概率就越大。存在漏检时,调低该值。取值范围:[0, 1.0] | |
- **unclip_ratio**: 控制文本检测框的大小,值越大,检测框整体越大。在出现框截断文字的情况,调大该值。取值范围:[1.5, 2.0] | |
- **text_score**: 文本识别结果是正确的置信度,值越大,显示出的识别结果更准确。存在漏检时,调低该值。取值范围:[0, 1.0]""") | |
with gr.Box(): | |
with gr.Row(): | |
box_thresh = gr.Slider(minimum=0, maximum=1.0, value=0.5, | |
label='box_thresh', step=0.1, | |
interactive=True, | |
info='[0, 1.0]') | |
unclip_ratio = gr.Slider(minimum=1.5, maximum=2.0, value=1.6, | |
label='unclip_ratio', step=0.1, | |
interactive=True, | |
info='[1.5, 2.0]') | |
text_score = gr.Slider(minimum=0, maximum=1.0, value=0.5, | |
label='text_score', step=0.1, | |
interactive=True, | |
info='[0, 1.0]') | |
with gr.Row(): | |
input_img = gr.Image(type='filepath', label='Input') | |
with gr.Column(scale=2): | |
out_img = gr.Image(type='filepath', label='Output_image') | |
out_txt = gr.outputs.Textbox(type='text', label='Output_text') | |
button = gr.Button('Submit') | |
button.click(fn=inference, | |
inputs=[input_img, box_thresh, unclip_ratio, text_score], | |
outputs=[out_img, out_txt]) | |
gr.Examples(examples=examples, | |
inputs=[input_img, box_thresh, unclip_ratio, text_score], | |
outputs=[out_img, out_txt], fn=inference) | |
demo.launch(debug=True, enable_queue=True) | |