Spaces:
Running
Running
File size: 974 Bytes
c7d6bcb 94b2105 c7d6bcb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 |
from dotenv import load_dotenv
import io
import boto3
import os
import pytesseract
from PIL import ImageFilter
import numpy as np
def textract_ocr(image, box):
load_dotenv()
x1, y1, x2, y2 = box
cropped_image = image.crop((x1, y1, x2, y2))
cropped_image = cropped_image.convert("L")
img_bytes = io.BytesIO()
cropped_image.save(img_bytes, format='PNG')
img_bytes = img_bytes.getvalue()
client = boto3.client('textract', region_name='eu-west-3', aws_access_key_id=os.getenv("aws_access_key_id"),
aws_secret_access_key=os.getenv('aws_secret_access_key')
)
response = client.detect_document_text(Document={'Bytes': img_bytes})
blocks = response['Blocks']
texttract = ""
line_confidence = {}
for block in blocks:
if(block['BlockType'] == 'LINE'):
line_confidence[block['Text']] = block['Confidence']
texttract+= block['Text']+"\n"
return texttract
|