Spaces:

sitammeur
/

TextSnap

Sleeping

App Files Files Community

sitammeur commited on Aug 21

Commit

c45cf35

•

1 Parent(s): 9442785

Upload 7 files

Browse files

Files changed (7) hide show

src/app/__init__.py +0 -0
src/app/model.py +90 -0
src/app/task.py +56 -0
src/exception.py +50 -0
src/logger.py +21 -0
src/utils/__init__.py +0 -0
src/utils/processing.py +104 -0

src/app/__init__.py ADDED Viewed

File without changes

src/app/model.py ADDED Viewed

	@@ -0,0 +1,90 @@

+# Importing necessary libraries
+import sys
+import subprocess
+from typing import Optional
+from PIL import Image
+from transformers import AutoProcessor, AutoModelForCausalLM
+import spaces
+# Local imports
+from src.logger import logging
+from src.exception import CustomExceptionHandling
+# Install the required dependencies
+try:
+    subprocess.run(
+        "pip install -r requirements.txt",
+        env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
+        shell=True,
+        check=True,
+    )
+    logging.info("Dependencies installed successfully.")
+# Handle exceptions that may occur during the process
+except Exception as e:
+    # Custom exception handling
+    raise CustomExceptionHandling(e, sys) from e
+# Load model and processor from Hugging Face
+model_id = "microsoft/Florence-2-large-ft"
+try:
+    model = (
+        AutoModelForCausalLM.from_pretrained(model_id, trust_remote_code=True)
+        .to("cuda")
+        .eval()
+    )
+    processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
+    logging.info("Model and processor loaded successfully.")
+# Handle exceptions that may occur during the process
+except Exception as e:
+    # Custom exception handling
+    raise CustomExceptionHandling(e, sys) from e
+@spaces.GPU(duration=120)
+def run_example(
+    task_prompt: str, image: Image.Image, text_input: Optional[str] = None
+) -> str:
+    """
+    Runs an example using the given task prompt and image.
+    Args:
+        - task_prompt (str): The task prompt for the example.
+        - image (PIL.Image.Image): The image to be processed.
+        - text_input (str, optional): Additional text input to be appended to the task prompt. Defaults to None.
+    Returns:
+        str: The parsed answer generated by the model.
+    """
+    try:
+        # If there is no text input, use the task prompt as the prompt
+        prompt = task_prompt if text_input is None else task_prompt + text_input
+        # Process the image and text input
+        inputs = processor(text=prompt, images=image, return_tensors="pt").to("cuda")
+        # Generate the answer using the model
+        generated_ids = model.generate(
+            input_ids=inputs["input_ids"],
+            pixel_values=inputs["pixel_values"],
+            max_new_tokens=1024,
+            early_stopping=False,
+            do_sample=False,
+            num_beams=3,
+        )
+        generated_text = processor.batch_decode(
+            generated_ids, skip_special_tokens=False
+        )[0]
+        parsed_answer = processor.post_process_generation(
+            generated_text, task=task_prompt, image_size=(image.width, image.height)
+        )
+        # Return the parsed answer
+        return parsed_answer
+    # Handle exceptions that may occur during the process
+    except Exception as e:
+        # Custom exception handling
+        raise CustomExceptionHandling(e, sys) from e

src/app/task.py ADDED Viewed

	@@ -0,0 +1,56 @@

+# Import necessary libraries
+import sys
+import copy
+from typing import Tuple
+from PIL import Image
+import supervision as sv
+# Local imports
+from src.utils.processing import clean_text, draw_ocr_bboxes
+from src.app.model import run_example
+from src.logger import logging
+from src.exception import CustomExceptionHandling
+def ocr_task(image: Image.Image) -> Tuple[Image.Image, str]:
+    """
+    Perform OCR (Optical Character Recognition) on the given image.
+    Args:
+        image (PIL.Image.Image): The input image to perform OCR on.
+    Returns:
+        tuple: A tuple containing the output image with OCR bounding boxes drawn and the cleaned OCR text.
+    """
+    try:
+        # Task prompts
+        ocr_prompt = "<OCR>"
+        ocr_with_region_prompt = "<OCR_WITH_REGION>"
+        # Get OCR text
+        ocr_results = run_example(ocr_prompt, image)
+        cleaned_text = clean_text(ocr_results["<OCR>"])
+        # Log the successful extraction and cleaning of OCR text
+        logging.info("OCR text extracted and cleaned successfully.")
+        # Get OCR with region
+        ocr_with_region_results = run_example(ocr_with_region_prompt, image)
+        output_image = copy.deepcopy(image)
+        detections = sv.Detections.from_lmm(
+            lmm=sv.LMM.FLORENCE_2,
+            result=ocr_with_region_results,
+            resolution_wh=image.size,
+        )
+        output_image = draw_ocr_bboxes(image, detections)
+        # Log the successful drawing of OCR bounding boxes
+        logging.info("OCR bounding boxes drawn successfully.")
+        # Return the output image and cleaned OCR text
+        return output_image, cleaned_text
+    # Handle exceptions that may occur during the process
+    except Exception as e:
+        # Custom exception handling
+        raise CustomExceptionHandling(e, sys) from e

src/exception.py ADDED Viewed

	@@ -0,0 +1,50 @@

+"""
+This module defines a custom exception handling class and a function to get error message with details of the error.
+"""
+# Standard Library
+import sys
+# Local imports
+from src.logger import logging
+# Function Definition to get error message with details of the error (file name and line number) when an error occurs in the program
+def get_error_message(error, error_detail: sys):
+    """
+    Get error message with details of the error.
+    Args:
+        - error (Exception): The error that occurred.
+        - error_detail (sys): The details of the error.
+    Returns:
+        str: A string containing the error message along with the file name and line number where the error occurred.
+    """
+    _, _, exc_tb = error_detail.exc_info()
+    # Get error details
+    file_name = exc_tb.tb_frame.f_code.co_filename
+    return "Error occured in python script name [{0}] line number [{1}] error message[{2}]".format(
+        file_name, exc_tb.tb_lineno, str(error)
+    )
+# Custom Exception Handling Class Definition
+class CustomExceptionHandling(Exception):
+    """
+    Custom Exception Handling:
+        This class defines a custom exception that can be raised when an error occurs in the program.
+        It takes an error message and an error detail as input and returns a formatted error message when the exception is raised.
+    """
+    # Constructor
+    def __init__(self, error_message, error_detail: sys):
+        """Initialize the exception"""
+        super().__init__(error_message)
+        self.error_message = get_error_message(error_message, error_detail=error_detail)
+    def __str__(self):
+        """String representation of the exception"""
+        return self.error_message

src/logger.py ADDED Viewed

	@@ -0,0 +1,21 @@

+# Importing the required modules
+import os
+import logging
+from datetime import datetime
+# Creating a log file with the current date and time as the name of the file
+LOG_FILE = f"{datetime.now().strftime('%m_%d_%Y_%H_%M_%S')}.log"
+# Creating a logs folder if it does not exist
+logs_path = os.path.join(os.getcwd(), "logs", LOG_FILE)
+os.makedirs(logs_path, exist_ok=True)
+# Setting the log file path and the log level
+LOG_FILE_PATH = os.path.join(logs_path, LOG_FILE)
+# Configuring the logger
+logging.basicConfig(
+    filename=LOG_FILE_PATH,
+    format="[ %(asctime)s ] %(lineno)d %(name)s - %(levelname)s - %(message)s",
+    level=logging.INFO,
+)

src/utils/__init__.py ADDED Viewed

File without changes

src/utils/processing.py ADDED Viewed

	@@ -0,0 +1,104 @@

+# Necessary imports
+import sys
+import re
+from PIL import Image
+import supervision as sv
+# Local imports
+from src.logger import logging
+from src.exception import CustomExceptionHandling
+# Text cleaning function
+def clean_text(text: str) -> str:
+    """
+    Cleans the given text by removing unwanted tokens, extra spaces,
+    and ensures proper spacing between words and after punctuation marks.
+    Args:
+        text (str): The input text to be cleaned.
+    Returns:
+        str: The cleaned and properly formatted text.
+    """
+    try:
+        # Remove unwanted tokens
+        text = text.replace("<pad>", "").replace("</s>", "").strip()
+        # Split the text into lines and clean each line
+        lines = text.split("\n")
+        cleaned_lines = [line.strip() for line in lines if line.strip()]
+        # Join the cleaned lines into a single string with a space between each line
+        cleaned_text = " ".join(cleaned_lines)
+        # Ensure proper spacing using regex
+        cleaned_text = re.sub(
+            r"\s+", " ", cleaned_text
+        )  # Replace multiple spaces with a single space
+        cleaned_text = re.sub(
+            r"(?<=[.,!?])(?=[^\s])", r" ", cleaned_text
+        )  # Add space after punctuation if not followed by a space
+        cleaned_text = re.sub(
+            r"(?<=[a-z])(?=[A-Z])", r" ", cleaned_text
+        )  # Add space between joined words where a lowercase letter is followed by an uppercase letter
+        cleaned_text = re.sub(
+            r"(\w)([A-Z][a-z])", r"\1 \2", cleaned_text
+        )  # Add space between camel case words
+        # Log the successful text cleaning
+        logging.info("Text cleaned successfully.")
+        # Return the cleaned text
+        return cleaned_text
+    # Handle exceptions that may occur during the process
+    except Exception as e:
+        # Custom exception handling
+        raise CustomExceptionHandling(e, sys) from e
+# Draw OCR bounding boxes with enhanced visual elements
+def draw_ocr_bboxes(image: Image.Image, detections: sv.Detections) -> Image.Image:
+    """
+    Draws bounding boxes and labels on the input image based on the OCR detections.
+    Args:
+        - image (PIL.Image.Image): The input image on which to draw the bounding boxes and labels.
+        - detections (sv.Detections): The OCR detections containing the bounding box coordinates and labels.
+    Returns:
+        PIL.Image.Image: The annotated image with bounding boxes and labels.
+    """
+    try:
+        # Copy the input image to avoid modifying the original image
+        annotated_image = image.copy()
+        # Calculate the optimal line thickness and text scale based on the image resolution
+        thickness = sv.calculate_optimal_line_thickness(resolution_wh=image.size)
+        text_scale = sv.calculate_optimal_text_scale(resolution_wh=image.size)
+        # Initialize the bounding box and label annotators
+        bounding_box_annotator = sv.BoundingBoxAnnotator(
+            color_lookup=sv.ColorLookup.INDEX, thickness=thickness
+        )
+        label_annotator = sv.LabelAnnotator(
+            color_lookup=sv.ColorLookup.INDEX,
+            text_scale=text_scale,
+            text_thickness=thickness,
+        )
+        # Annotate the image with bounding boxes and labels
+        annotated_image = bounding_box_annotator.annotate(annotated_image, detections)
+        annotated_image = label_annotator.annotate(annotated_image, detections)
+        # Log the successful annotation
+        logging.info("Bounding boxes and labels drawn successfully.")
+        # Return the annotated image
+        return annotated_image
+    # Handle exceptions that may occur during the process
+    except Exception as e:
+        # Custom exception handling
+        raise CustomExceptionHandling(e, sys) from e