from dotenv import load_dotenv import io import boto3 from paddleocr import PaddleOCR import os import pytesseract from PIL import ImageFilter import numpy as np def textract_ocr(image, box): load_dotenv() x1, y1, x2, y2 = box cropped_image = image.crop((x1, y1, x2, y2)) cropped_image = cropped_image.convert("L") img_bytes = io.BytesIO() cropped_image.save(img_bytes, format='PNG') img_bytes = img_bytes.getvalue() client = boto3.client('textract', region_name='eu-west-3', aws_access_key_id=os.getenv("aws_access_key_id"), aws_secret_access_key=os.getenv('aws_secret_access_key') ) response = client.detect_document_text(Document={'Bytes': img_bytes}) blocks = response['Blocks'] texttract = "" line_confidence = {} for block in blocks: if(block['BlockType'] == 'LINE'): line_confidence[block['Text']] = block['Confidence'] texttract+= block['Text']+"\n" return texttract def paddle_ocr(image,box): x1, y1, x2, y2 = box cropped_image = image.crop((x1, y1, x2, y2)) cropped_image = np.array(cropped_image) ocr = PaddleOCR(use_angle_cls=False, lang='latin') result = ocr.ocr(cropped_image, cls=False) text= "" if result [0] != None: result.sort(key=lambda x: (x[0][0][1], x[0][0][0])) text = [x[1][0] for x in result[0]] return "\n".join(text) def tesseract_ocr(image, box): target_dpi = 300 x1, y1, x2, y2 = box cropped_image = image.crop((x1, y1, x2, y2)) cropped_image = cropped_image.convert("L") current_dpi = cropped_image.info['dpi'][0] if 'dpi' in image.info else None if current_dpi: scale_factor = target_dpi / current_dpi else: scale_factor = 1.0 binarized_image = cropped_image.filter(ImageFilter.MedianFilter()) binarized_image = binarized_image.point(lambda p: p > 180 and 255) text = pytesseract.image_to_string(binarized_image, config="--psm 6") return text