Upload 7 files
Browse files- src/app/__init__.py +0 -0
- src/app/model.py +90 -0
- src/app/task.py +56 -0
- src/exception.py +50 -0
- src/logger.py +21 -0
- src/utils/__init__.py +0 -0
- src/utils/processing.py +104 -0
src/app/__init__.py
ADDED
File without changes
|
src/app/model.py
ADDED
@@ -0,0 +1,90 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Importing necessary libraries
|
2 |
+
import sys
|
3 |
+
import subprocess
|
4 |
+
from typing import Optional
|
5 |
+
from PIL import Image
|
6 |
+
from transformers import AutoProcessor, AutoModelForCausalLM
|
7 |
+
import spaces
|
8 |
+
|
9 |
+
# Local imports
|
10 |
+
from src.logger import logging
|
11 |
+
from src.exception import CustomExceptionHandling
|
12 |
+
|
13 |
+
|
14 |
+
# Install the required dependencies
|
15 |
+
try:
|
16 |
+
subprocess.run(
|
17 |
+
"pip install -r requirements.txt",
|
18 |
+
env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
|
19 |
+
shell=True,
|
20 |
+
check=True,
|
21 |
+
)
|
22 |
+
logging.info("Dependencies installed successfully.")
|
23 |
+
|
24 |
+
# Handle exceptions that may occur during the process
|
25 |
+
except Exception as e:
|
26 |
+
# Custom exception handling
|
27 |
+
raise CustomExceptionHandling(e, sys) from e
|
28 |
+
|
29 |
+
# Load model and processor from Hugging Face
|
30 |
+
model_id = "microsoft/Florence-2-large-ft"
|
31 |
+
try:
|
32 |
+
model = (
|
33 |
+
AutoModelForCausalLM.from_pretrained(model_id, trust_remote_code=True)
|
34 |
+
.to("cuda")
|
35 |
+
.eval()
|
36 |
+
)
|
37 |
+
processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)
|
38 |
+
logging.info("Model and processor loaded successfully.")
|
39 |
+
|
40 |
+
# Handle exceptions that may occur during the process
|
41 |
+
except Exception as e:
|
42 |
+
# Custom exception handling
|
43 |
+
raise CustomExceptionHandling(e, sys) from e
|
44 |
+
|
45 |
+
|
46 |
+
@spaces.GPU(duration=120)
|
47 |
+
def run_example(
|
48 |
+
task_prompt: str, image: Image.Image, text_input: Optional[str] = None
|
49 |
+
) -> str:
|
50 |
+
"""
|
51 |
+
Runs an example using the given task prompt and image.
|
52 |
+
|
53 |
+
Args:
|
54 |
+
- task_prompt (str): The task prompt for the example.
|
55 |
+
- image (PIL.Image.Image): The image to be processed.
|
56 |
+
- text_input (str, optional): Additional text input to be appended to the task prompt. Defaults to None.
|
57 |
+
|
58 |
+
Returns:
|
59 |
+
str: The parsed answer generated by the model.
|
60 |
+
"""
|
61 |
+
try:
|
62 |
+
# If there is no text input, use the task prompt as the prompt
|
63 |
+
prompt = task_prompt if text_input is None else task_prompt + text_input
|
64 |
+
|
65 |
+
# Process the image and text input
|
66 |
+
inputs = processor(text=prompt, images=image, return_tensors="pt").to("cuda")
|
67 |
+
|
68 |
+
# Generate the answer using the model
|
69 |
+
generated_ids = model.generate(
|
70 |
+
input_ids=inputs["input_ids"],
|
71 |
+
pixel_values=inputs["pixel_values"],
|
72 |
+
max_new_tokens=1024,
|
73 |
+
early_stopping=False,
|
74 |
+
do_sample=False,
|
75 |
+
num_beams=3,
|
76 |
+
)
|
77 |
+
generated_text = processor.batch_decode(
|
78 |
+
generated_ids, skip_special_tokens=False
|
79 |
+
)[0]
|
80 |
+
parsed_answer = processor.post_process_generation(
|
81 |
+
generated_text, task=task_prompt, image_size=(image.width, image.height)
|
82 |
+
)
|
83 |
+
|
84 |
+
# Return the parsed answer
|
85 |
+
return parsed_answer
|
86 |
+
|
87 |
+
# Handle exceptions that may occur during the process
|
88 |
+
except Exception as e:
|
89 |
+
# Custom exception handling
|
90 |
+
raise CustomExceptionHandling(e, sys) from e
|
src/app/task.py
ADDED
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Import necessary libraries
|
2 |
+
import sys
|
3 |
+
import copy
|
4 |
+
from typing import Tuple
|
5 |
+
from PIL import Image
|
6 |
+
import supervision as sv
|
7 |
+
|
8 |
+
# Local imports
|
9 |
+
from src.utils.processing import clean_text, draw_ocr_bboxes
|
10 |
+
from src.app.model import run_example
|
11 |
+
from src.logger import logging
|
12 |
+
from src.exception import CustomExceptionHandling
|
13 |
+
|
14 |
+
|
15 |
+
def ocr_task(image: Image.Image) -> Tuple[Image.Image, str]:
|
16 |
+
"""
|
17 |
+
Perform OCR (Optical Character Recognition) on the given image.
|
18 |
+
|
19 |
+
Args:
|
20 |
+
image (PIL.Image.Image): The input image to perform OCR on.
|
21 |
+
|
22 |
+
Returns:
|
23 |
+
tuple: A tuple containing the output image with OCR bounding boxes drawn and the cleaned OCR text.
|
24 |
+
"""
|
25 |
+
try:
|
26 |
+
# Task prompts
|
27 |
+
ocr_prompt = "<OCR>"
|
28 |
+
ocr_with_region_prompt = "<OCR_WITH_REGION>"
|
29 |
+
|
30 |
+
# Get OCR text
|
31 |
+
ocr_results = run_example(ocr_prompt, image)
|
32 |
+
cleaned_text = clean_text(ocr_results["<OCR>"])
|
33 |
+
|
34 |
+
# Log the successful extraction and cleaning of OCR text
|
35 |
+
logging.info("OCR text extracted and cleaned successfully.")
|
36 |
+
|
37 |
+
# Get OCR with region
|
38 |
+
ocr_with_region_results = run_example(ocr_with_region_prompt, image)
|
39 |
+
output_image = copy.deepcopy(image)
|
40 |
+
detections = sv.Detections.from_lmm(
|
41 |
+
lmm=sv.LMM.FLORENCE_2,
|
42 |
+
result=ocr_with_region_results,
|
43 |
+
resolution_wh=image.size,
|
44 |
+
)
|
45 |
+
output_image = draw_ocr_bboxes(image, detections)
|
46 |
+
|
47 |
+
# Log the successful drawing of OCR bounding boxes
|
48 |
+
logging.info("OCR bounding boxes drawn successfully.")
|
49 |
+
|
50 |
+
# Return the output image and cleaned OCR text
|
51 |
+
return output_image, cleaned_text
|
52 |
+
|
53 |
+
# Handle exceptions that may occur during the process
|
54 |
+
except Exception as e:
|
55 |
+
# Custom exception handling
|
56 |
+
raise CustomExceptionHandling(e, sys) from e
|
src/exception.py
ADDED
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
This module defines a custom exception handling class and a function to get error message with details of the error.
|
3 |
+
"""
|
4 |
+
|
5 |
+
# Standard Library
|
6 |
+
import sys
|
7 |
+
|
8 |
+
# Local imports
|
9 |
+
from src.logger import logging
|
10 |
+
|
11 |
+
|
12 |
+
# Function Definition to get error message with details of the error (file name and line number) when an error occurs in the program
|
13 |
+
def get_error_message(error, error_detail: sys):
|
14 |
+
"""
|
15 |
+
Get error message with details of the error.
|
16 |
+
|
17 |
+
Args:
|
18 |
+
- error (Exception): The error that occurred.
|
19 |
+
- error_detail (sys): The details of the error.
|
20 |
+
|
21 |
+
Returns:
|
22 |
+
str: A string containing the error message along with the file name and line number where the error occurred.
|
23 |
+
"""
|
24 |
+
_, _, exc_tb = error_detail.exc_info()
|
25 |
+
|
26 |
+
# Get error details
|
27 |
+
file_name = exc_tb.tb_frame.f_code.co_filename
|
28 |
+
return "Error occured in python script name [{0}] line number [{1}] error message[{2}]".format(
|
29 |
+
file_name, exc_tb.tb_lineno, str(error)
|
30 |
+
)
|
31 |
+
|
32 |
+
|
33 |
+
# Custom Exception Handling Class Definition
|
34 |
+
class CustomExceptionHandling(Exception):
|
35 |
+
"""
|
36 |
+
Custom Exception Handling:
|
37 |
+
This class defines a custom exception that can be raised when an error occurs in the program.
|
38 |
+
It takes an error message and an error detail as input and returns a formatted error message when the exception is raised.
|
39 |
+
"""
|
40 |
+
|
41 |
+
# Constructor
|
42 |
+
def __init__(self, error_message, error_detail: sys):
|
43 |
+
"""Initialize the exception"""
|
44 |
+
super().__init__(error_message)
|
45 |
+
|
46 |
+
self.error_message = get_error_message(error_message, error_detail=error_detail)
|
47 |
+
|
48 |
+
def __str__(self):
|
49 |
+
"""String representation of the exception"""
|
50 |
+
return self.error_message
|
src/logger.py
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Importing the required modules
|
2 |
+
import os
|
3 |
+
import logging
|
4 |
+
from datetime import datetime
|
5 |
+
|
6 |
+
# Creating a log file with the current date and time as the name of the file
|
7 |
+
LOG_FILE = f"{datetime.now().strftime('%m_%d_%Y_%H_%M_%S')}.log"
|
8 |
+
|
9 |
+
# Creating a logs folder if it does not exist
|
10 |
+
logs_path = os.path.join(os.getcwd(), "logs", LOG_FILE)
|
11 |
+
os.makedirs(logs_path, exist_ok=True)
|
12 |
+
|
13 |
+
# Setting the log file path and the log level
|
14 |
+
LOG_FILE_PATH = os.path.join(logs_path, LOG_FILE)
|
15 |
+
|
16 |
+
# Configuring the logger
|
17 |
+
logging.basicConfig(
|
18 |
+
filename=LOG_FILE_PATH,
|
19 |
+
format="[ %(asctime)s ] %(lineno)d %(name)s - %(levelname)s - %(message)s",
|
20 |
+
level=logging.INFO,
|
21 |
+
)
|
src/utils/__init__.py
ADDED
File without changes
|
src/utils/processing.py
ADDED
@@ -0,0 +1,104 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Necessary imports
|
2 |
+
import sys
|
3 |
+
import re
|
4 |
+
from PIL import Image
|
5 |
+
import supervision as sv
|
6 |
+
|
7 |
+
# Local imports
|
8 |
+
from src.logger import logging
|
9 |
+
from src.exception import CustomExceptionHandling
|
10 |
+
|
11 |
+
|
12 |
+
# Text cleaning function
|
13 |
+
def clean_text(text: str) -> str:
|
14 |
+
"""
|
15 |
+
Cleans the given text by removing unwanted tokens, extra spaces,
|
16 |
+
and ensures proper spacing between words and after punctuation marks.
|
17 |
+
|
18 |
+
Args:
|
19 |
+
text (str): The input text to be cleaned.
|
20 |
+
|
21 |
+
Returns:
|
22 |
+
str: The cleaned and properly formatted text.
|
23 |
+
"""
|
24 |
+
try:
|
25 |
+
# Remove unwanted tokens
|
26 |
+
text = text.replace("<pad>", "").replace("</s>", "").strip()
|
27 |
+
|
28 |
+
# Split the text into lines and clean each line
|
29 |
+
lines = text.split("\n")
|
30 |
+
cleaned_lines = [line.strip() for line in lines if line.strip()]
|
31 |
+
|
32 |
+
# Join the cleaned lines into a single string with a space between each line
|
33 |
+
cleaned_text = " ".join(cleaned_lines)
|
34 |
+
|
35 |
+
# Ensure proper spacing using regex
|
36 |
+
cleaned_text = re.sub(
|
37 |
+
r"\s+", " ", cleaned_text
|
38 |
+
) # Replace multiple spaces with a single space
|
39 |
+
cleaned_text = re.sub(
|
40 |
+
r"(?<=[.,!?])(?=[^\s])", r" ", cleaned_text
|
41 |
+
) # Add space after punctuation if not followed by a space
|
42 |
+
cleaned_text = re.sub(
|
43 |
+
r"(?<=[a-z])(?=[A-Z])", r" ", cleaned_text
|
44 |
+
) # Add space between joined words where a lowercase letter is followed by an uppercase letter
|
45 |
+
cleaned_text = re.sub(
|
46 |
+
r"(\w)([A-Z][a-z])", r"\1 \2", cleaned_text
|
47 |
+
) # Add space between camel case words
|
48 |
+
|
49 |
+
# Log the successful text cleaning
|
50 |
+
logging.info("Text cleaned successfully.")
|
51 |
+
|
52 |
+
# Return the cleaned text
|
53 |
+
return cleaned_text
|
54 |
+
|
55 |
+
# Handle exceptions that may occur during the process
|
56 |
+
except Exception as e:
|
57 |
+
# Custom exception handling
|
58 |
+
raise CustomExceptionHandling(e, sys) from e
|
59 |
+
|
60 |
+
|
61 |
+
# Draw OCR bounding boxes with enhanced visual elements
|
62 |
+
def draw_ocr_bboxes(image: Image.Image, detections: sv.Detections) -> Image.Image:
|
63 |
+
"""
|
64 |
+
Draws bounding boxes and labels on the input image based on the OCR detections.
|
65 |
+
|
66 |
+
Args:
|
67 |
+
- image (PIL.Image.Image): The input image on which to draw the bounding boxes and labels.
|
68 |
+
- detections (sv.Detections): The OCR detections containing the bounding box coordinates and labels.
|
69 |
+
|
70 |
+
Returns:
|
71 |
+
PIL.Image.Image: The annotated image with bounding boxes and labels.
|
72 |
+
"""
|
73 |
+
try:
|
74 |
+
# Copy the input image to avoid modifying the original image
|
75 |
+
annotated_image = image.copy()
|
76 |
+
|
77 |
+
# Calculate the optimal line thickness and text scale based on the image resolution
|
78 |
+
thickness = sv.calculate_optimal_line_thickness(resolution_wh=image.size)
|
79 |
+
text_scale = sv.calculate_optimal_text_scale(resolution_wh=image.size)
|
80 |
+
|
81 |
+
# Initialize the bounding box and label annotators
|
82 |
+
bounding_box_annotator = sv.BoundingBoxAnnotator(
|
83 |
+
color_lookup=sv.ColorLookup.INDEX, thickness=thickness
|
84 |
+
)
|
85 |
+
label_annotator = sv.LabelAnnotator(
|
86 |
+
color_lookup=sv.ColorLookup.INDEX,
|
87 |
+
text_scale=text_scale,
|
88 |
+
text_thickness=thickness,
|
89 |
+
)
|
90 |
+
|
91 |
+
# Annotate the image with bounding boxes and labels
|
92 |
+
annotated_image = bounding_box_annotator.annotate(annotated_image, detections)
|
93 |
+
annotated_image = label_annotator.annotate(annotated_image, detections)
|
94 |
+
|
95 |
+
# Log the successful annotation
|
96 |
+
logging.info("Bounding boxes and labels drawn successfully.")
|
97 |
+
|
98 |
+
# Return the annotated image
|
99 |
+
return annotated_image
|
100 |
+
|
101 |
+
# Handle exceptions that may occur during the process
|
102 |
+
except Exception as e:
|
103 |
+
# Custom exception handling
|
104 |
+
raise CustomExceptionHandling(e, sys) from e
|