Spaces:
Build error
Build error
from paddleocr import PaddleOCR | |
from vietocr.tool.config import Cfg | |
from vietocr.tool.predictor import Predictor | |
import cv2 | |
import requests | |
import unidecode | |
import numpy as np | |
from PIL import Image, ImageFont, ImageDraw | |
class OCRDetector: | |
def __init__(self) -> None: | |
self.paddle_ocr = PaddleOCR(lang='en', use_angle_cls=False) | |
# config['weights'] = './weights/transformerocr.pth' | |
self.config = Cfg.load_config_from_name('vgg_transformer') | |
self.config['weights'] = "./storage/ocr_model.pth" | |
self.config['cnn']['pretrained']=False | |
self.config['device'] = "cpu" | |
self.config['predictor']['beamsearch']=False | |
self.viet_ocr = Predictor(self.config) | |
def find_box(self, image): | |
'''Xác định box dựa vào mô hình paddle_ocr''' | |
result = self.paddle_ocr.ocr(image, cls = False) | |
result = result[0] | |
# Extracting detected components | |
boxes = [res[0] for res in result] | |
texts = [{"text": res[1][0], "score": res[1][1]} for res in result] | |
# scores = [res[1][1] for res in result] | |
return boxes, texts | |
def vietnamese_text(self, boxes, image): | |
'''Xác định text dựa vào mô hình viet_ocr''' | |
texts = [] | |
for box in boxes: | |
A = box[0] | |
B = box[1] | |
C = box[2] | |
D = box[3] | |
y1 = min(A[1], B[1]) | |
y1 = int(max(0, y1 - max(0, 10 - abs(A[1] - B[1])))) | |
y2 = max(C[1], D[1]) | |
y2 = int(y2 + max(0, 10 - abs(A[1] - B[1]))) | |
x1 = int(max(0, min(A[0], D[0]) )) | |
x2 = int(max(B[0], C[0]) ) | |
cut_image = image[y1:y2, x1:x2] | |
cut_image = Image.fromarray(np.uint8(cut_image)) | |
text, score = self.viet_ocr.predict(cut_image, return_prob=True) | |
texts.append({"text": text, | |
"score": score}) | |
return texts | |
#Merge | |
def text_detector(self, image_path, is_local=False): | |
if is_local: | |
image = Image.open(image_path).convert("RGB") | |
else: | |
image = Image.open(requests.get(image_path, stream=True).raw).convert("RGB") | |
image = np.array(image) | |
boxes, paddle_texts = self.find_box(image) | |
if not boxes: | |
return image, None, None | |
viet_texts = self.vietnamese_text(boxes, image) | |
results_texts = [] | |
for i, viet_txt in enumerate(viet_texts): | |
if viet_txt["text"] != unidecode.unidecode(viet_txt["text"]): | |
results_texts.append(viet_txt) | |
else: | |
results_texts.append(paddle_texts[i]) | |
if results_texts != []: | |
return image, results_texts, boxes | |
else: | |
return image, None, None | |
def visualize_ocr(self, image, texts, boxes): | |
if not texts: | |
return image | |
img = image.copy() | |
for box, text in zip(boxes, texts): | |
(x1, y1), (x2, y2), (x3, y3), (x4, y4) = box | |
h = y3 - y1 | |
scl = max(h//1000,1) | |
font = ImageFont.truetype("./storage/Roboto-Black.ttf", 15*scl) | |
img = cv2.rectangle(img, (int(x1), int(y1)), (int(x3), int(y3)), (0, 255, 0), 1) | |
img_pil = Image.fromarray(img) | |
draw = ImageDraw.Draw(img_pil) | |
draw.text((int(x1), int(y1-h-3)), text["text"], font = font, fill = (51, 51, 255)) | |
img = np.array(img_pil) | |
# img = cv2.putText(img, text["text"], (int(x1), int(y1)-3), cv2.FONT_HERSHEY_SIMPLEX, 0.3, (255,0,0), 1) | |
return img |