Spaces:

truong-xuan-linh
/

vietnamese-ocr

Build error

App Files Files Community

vietnamese-ocr / src /OCR.py

truong-xuan-linh

inmit:

03484ca about 1 year ago

raw

history blame

3.67 kB

	from paddleocr import PaddleOCR
	from vietocr.tool.config import Cfg
	from vietocr.tool.predictor import Predictor

	import cv2
	import requests
	import unidecode
	import numpy as np
	from PIL import Image, ImageFont, ImageDraw

	class OCRDetector:
	def __init__(self) -> None:
	self.paddle_ocr = PaddleOCR(lang='en', use_angle_cls=False)
	# config['weights'] = './weights/transformerocr.pth'
	self.config = Cfg.load_config_from_name('vgg_transformer')
	self.config['weights'] = "./storage/ocr_model.pth"
	self.config['cnn']['pretrained']=False
	self.config['device'] = "cpu"
	self.config['predictor']['beamsearch']=False
	self.viet_ocr = Predictor(self.config)

	def find_box(self, image):
	'''Xác định box dựa vào mô hình paddle_ocr'''
	result = self.paddle_ocr.ocr(image, cls = False)
	result = result[0]
	# Extracting detected components
	boxes = [res[0] for res in result]
	texts = [{"text": res[1][0], "score": res[1][1]} for res in result]

	# scores = [res[1][1] for res in result]
	return boxes, texts

	def vietnamese_text(self, boxes, image):
	'''Xác định text dựa vào mô hình viet_ocr'''
	texts = []
	for box in boxes:
	A = box[0]
	B = box[1]
	C = box[2]
	D = box[3]
	y1 = min(A[1], B[1])
	y1 = int(max(0, y1 - max(0, 10 - abs(A[1] - B[1]))))
	y2 = max(C[1], D[1])
	y2 = int(y2 + max(0, 10 - abs(A[1] - B[1])))
	x1 = int(max(0, min(A[0], D[0]) ))
	x2 = int(max(B[0], C[0]) )
	cut_image = image[y1:y2, x1:x2]
	cut_image = Image.fromarray(np.uint8(cut_image))
	text, score = self.viet_ocr.predict(cut_image, return_prob=True)
	texts.append({"text": text,
	"score": score})
	return texts

	#Merge
	def text_detector(self, image_path, is_local=False):
	if is_local:
	image = Image.open(image_path).convert("RGB")
	else:
	image = Image.open(requests.get(image_path, stream=True).raw).convert("RGB")
	image = np.array(image)
	boxes, paddle_texts = self.find_box(image)
	if not boxes:
	return image, None, None
	viet_texts = self.vietnamese_text(boxes, image)
	results_texts = []
	for i, viet_txt in enumerate(viet_texts):
	if viet_txt["text"] != unidecode.unidecode(viet_txt["text"]):
	results_texts.append(viet_txt)
	else:
	results_texts.append(paddle_texts[i])
	if results_texts != []:
	return image, results_texts, boxes
	else:
	return image, None, None


	def visualize_ocr(self, image, texts, boxes):
	if not texts:
	return image

	img = image.copy()
	for box, text in zip(boxes, texts):
	(x1, y1), (x2, y2), (x3, y3), (x4, y4) = box

	h = y3 - y1
	scl = max(h//1000,1)
	font = ImageFont.truetype("./storage/Roboto-Black.ttf", 15*scl)
	img = cv2.rectangle(img, (int(x1), int(y1)), (int(x3), int(y3)), (0, 255, 0), 1)

	img_pil = Image.fromarray(img)
	draw = ImageDraw.Draw(img_pil)
	draw.text((int(x1), int(y1-h-3)), text["text"], font = font, fill = (51, 51, 255))
	img = np.array(img_pil)
	# img = cv2.putText(img, text["text"], (int(x1), int(y1)-3), cv2.FONT_HERSHEY_SIMPLEX, 0.3, (255,0,0), 1)
	return img