import copy | |
from src.utils import run_example, clean_text, draw_ocr_bboxes | |
def ocr_task(image): | |
""" | |
Perform OCR (Optical Character Recognition) on the given image. | |
Args: | |
image (PIL.Image.Image): The input image to perform OCR on. | |
Returns: | |
tuple: A tuple containing the output image with OCR bounding boxes drawn and the cleaned OCR text. | |
""" | |
# Task prompts | |
ocr_prompt = "<OCR>" | |
ocr_with_region_prompt = "<OCR_WITH_REGION>" | |
# Get OCR text | |
ocr_results = run_example(ocr_prompt, image) | |
cleaned_text = clean_text(ocr_results["<OCR>"]) | |
# Get OCR with region | |
ocr_with_region_results = run_example(ocr_with_region_prompt, image) | |
output_image = copy.deepcopy(image) | |
output_image = draw_ocr_bboxes( | |
output_image, ocr_with_region_results["<OCR_WITH_REGION>"] | |
) | |
# Return the output image and cleaned OCR text | |
return output_image, cleaned_text | |