from dotenv import load_dotenv import io import boto3 import os import pytesseract from PIL import ImageFilter import numpy as np def textract_ocr(image, box): load_dotenv() x1, y1, x2, y2 = box cropped_image = image.crop((x1, y1, x2, y2)) cropped_image = cropped_image.convert("L") img_bytes = io.BytesIO() cropped_image.save(img_bytes, format='PNG') img_bytes = img_bytes.getvalue() client = boto3.client('textract', region_name='eu-west-3', aws_access_key_id=os.getenv("aws_access_key_id"), aws_secret_access_key=os.getenv('aws_secret_access_key') ) response = client.detect_document_text(Document={'Bytes': img_bytes}) blocks = response['Blocks'] texttract = "" line_confidence = {} for block in blocks: if(block['BlockType'] == 'LINE'): line_confidence[block['Text']] = block['Confidence'] texttract+= block['Text']+"\n" return texttract