Spaces:

SWHL
/

RapidOCRDemo

Running

App Files Files Community

RapidOCRDemo / app.py

SWHL

Update app.py

cabcd6b over 1 year ago

raw

history blame

6.07 kB

	# -- encoding: utf-8 --
	import math
	import random
	import time
	from pathlib import Path

	import cv2
	import gradio as gr
	import numpy as np
	from PIL import Image, ImageDraw, ImageFont
	from rapidocr_onnxruntime import RapidOCR


	def draw_ocr_box_txt(image, boxes, txts, font_path,
	scores=None, text_score=0.5):
	h, w = image.height, image.width
	img_left = image.copy()
	img_right = Image.new('RGB', (w, h), (255, 255, 255))

	random.seed(0)
	draw_left = ImageDraw.Draw(img_left)
	draw_right = ImageDraw.Draw(img_right)
	for idx, (box, txt) in enumerate(zip(boxes, txts)):
	if scores is not None and float(scores[idx]) < text_score:
	continue

	color = (random.randint(0, 255),
	random.randint(0, 255),
	random.randint(0, 255))

	box = [tuple(v) for v in box]
	draw_left.polygon(box, fill=color)
	draw_right.polygon([box[0][0], box[0][1],
	box[1][0], box[1][1],
	box[2][0], box[2][1],
	box[3][0], box[3][1]],
	outline=color)

	box_height = math.sqrt((box[0][0] - box[3][0])**2
	+ (box[0][1] - box[3][1])**2)

	box_width = math.sqrt((box[0][0] - box[1][0])**2
	+ (box[0][1] - box[1][1])**2)

	if box_height > 2 * box_width:
	font_size = max(int(box_width * 0.9), 10)
	font = ImageFont.truetype(font_path, font_size,
	encoding="utf-8")
	cur_y = box[0][1]
	for c in txt:
	char_size = font.getsize(c)
	draw_right.text((box[0][0] + 3, cur_y), c,
	fill=(0, 0, 0), font=font)
	cur_y += char_size[1]
	else:
	font_size = max(int(box_height * 0.8), 10)
	font = ImageFont.truetype(font_path, font_size, encoding="utf-8")
	draw_right.text([box[0][0], box[0][1]], txt,
	fill=(0, 0, 0), font=font)

	img_left = Image.blend(image, img_left, 0.5)
	img_show = Image.new('RGB', (w * 2, h), (255, 255, 255))
	img_show.paste(img_left, (0, 0, w, h))
	img_show.paste(img_right, (w, 0, w * 2, h))
	return np.array(img_show)


	def visualize(image_path, boxes, txts, scores,
	font_path="./FZYTK.TTF"):
	image = Image.open(image_path)

	draw_img = draw_ocr_box_txt(image, boxes,
	txts, font_path,
	scores,
	text_score=0.5)

	draw_img_save = Path("./inference_results/")
	if not draw_img_save.exists():
	draw_img_save.mkdir(parents=True, exist_ok=True)

	time_stamp = time.strftime('%Y-%m-%d-%H-%M-%S', time.localtime(time.time()))
	image_save = str(draw_img_save / f'{time_stamp}_{Path(image_path).name}')
	cv2.imwrite(image_save, draw_img[:, :, ::-1])
	return image_save


	def inference(img_path, box_thresh=0.5, unclip_ratio=1.6, text_score=0.5):
	img = cv2.imread(img_path)
	ocr_result, _ = rapid_ocr(img, box_thresh=box_thresh,
	unclip_ratio=unclip_ratio,
	text_score=text_score)
	if not ocr_result:
	return img_path, '未识别到有效文本'

	dt_boxes, rec_res, scores = list(zip(*ocr_result))
	img_save_path = visualize(img_path, dt_boxes, rec_res, scores)
	output_text = [f'{one_rec} {float(score):.4f}'
	for one_rec, score in zip(rec_res, scores)]
	return img_save_path, output_text


	rapid_ocr = RapidOCR()
	examples = [['images/1.jpg']]

	with gr.Blocks(title='RapidOCR') as demo:
	gr.Markdown("""
	<h1><center><a href="https://github.com/RapidAI/RapidOCR" target="_blank">Rapid⚡OCR</a></center></h1>

	### Docs: [Docs](https://rapidocr.rtfd.io/)
	### Parameters docs: [link](https://github.com/RapidAI/RapidOCR/tree/main/python#configyaml%E4%B8%AD%E5%B8%B8%E7%94%A8%E5%8F%82%E6%95%B0%E4%BB%8B%E7%BB%8D)
	- box_thresh: 检测到的框是文本的概率，值越大，框中是文本的概率就越大。存在漏检时，调低该值。取值范围：[0, 1.0]
	- unclip_ratio: 控制文本检测框的大小，值越大，检测框整体越大。在出现框截断文字的情况，调大该值。取值范围：[1.5, 2.0]
	- text_score: 文本识别结果是正确的置信度，值越大，显示出的识别结果更准确。存在漏检时，调低该值。取值范围：[0, 1.0]""")
	with gr.Box():
	with gr.Row():
	box_thresh = gr.Slider(minimum=0, maximum=1.0, value=0.5,
	label='box_thresh', step=0.1,
	interactive=True,
	info='[0, 1.0]')
	unclip_ratio = gr.Slider(minimum=1.5, maximum=2.0, value=1.6,
	label='unclip_ratio', step=0.1,
	interactive=True,
	info='[1.5, 2.0]')
	text_score = gr.Slider(minimum=0, maximum=1.0, value=0.5,
	label='text_score', step=0.1,
	interactive=True,
	info='[0, 1.0]')
	with gr.Row():
	input_img = gr.Image(type='filepath', label='Input')
	with gr.Column(scale=2):
	out_img = gr.Image(type='filepath', label='Output_image')
	out_txt = gr.outputs.Textbox(type='text', label='Output_text')
	button = gr.Button('Submit')
	button.click(fn=inference,
	inputs=[input_img, box_thresh, unclip_ratio, text_score],
	outputs=[out_img, out_txt])

	gr.Examples(examples=examples,
	inputs=[input_img, box_thresh, unclip_ratio, text_score],
	outputs=[out_img, out_txt], fn=inference)
	demo.launch(debug=True, enable_queue=True)