Spaces:
Running
Running
from dotenv import load_dotenv | |
import io | |
import boto3 | |
import os | |
import pytesseract | |
from PIL import ImageFilter | |
import numpy as np | |
def textract_ocr(image, box): | |
load_dotenv() | |
x1, y1, x2, y2 = box | |
cropped_image = image.crop((x1, y1, x2, y2)) | |
cropped_image = cropped_image.convert("L") | |
img_bytes = io.BytesIO() | |
cropped_image.save(img_bytes, format='PNG') | |
img_bytes = img_bytes.getvalue() | |
client = boto3.client('textract', region_name='eu-west-3', aws_access_key_id=os.getenv("aws_access_key_id"), | |
aws_secret_access_key=os.getenv('aws_secret_access_key') | |
) | |
response = client.detect_document_text(Document={'Bytes': img_bytes}) | |
blocks = response['Blocks'] | |
texttract = "" | |
line_confidence = {} | |
for block in blocks: | |
if(block['BlockType'] == 'LINE'): | |
line_confidence[block['Text']] = block['Confidence'] | |
texttract+= block['Text']+"\n" | |
return texttract | |