Delete src/utils.py
Browse files- src/utils.py +0 -81
src/utils.py
DELETED
@@ -1,81 +0,0 @@
|
|
1 |
-
# Necessary imports
|
2 |
-
import re
|
3 |
-
import supervision as sv
|
4 |
-
from PIL import Image
|
5 |
-
|
6 |
-
|
7 |
-
# Text cleaning function
|
8 |
-
def clean_text(text):
|
9 |
-
"""
|
10 |
-
Cleans the given text by removing unwanted tokens, extra spaces,
|
11 |
-
and ensures proper spacing between words and after punctuation marks.
|
12 |
-
|
13 |
-
Args:
|
14 |
-
text (str): The input text to be cleaned.
|
15 |
-
|
16 |
-
Returns:
|
17 |
-
str: The cleaned and properly formatted text.
|
18 |
-
"""
|
19 |
-
# Remove unwanted tokens
|
20 |
-
text = text.replace("<pad>", "").replace("</s>", "").strip()
|
21 |
-
|
22 |
-
# Split the text into lines and clean each line
|
23 |
-
lines = text.split("\n")
|
24 |
-
cleaned_lines = [line.strip() for line in lines if line.strip()]
|
25 |
-
|
26 |
-
# Join the cleaned lines into a single string with a space between each line
|
27 |
-
cleaned_text = " ".join(cleaned_lines)
|
28 |
-
|
29 |
-
# Ensure proper spacing using regex
|
30 |
-
cleaned_text = re.sub(
|
31 |
-
r"\s+", " ", cleaned_text
|
32 |
-
) # Replace multiple spaces with a single space
|
33 |
-
cleaned_text = re.sub(
|
34 |
-
r"(?<=[.,!?])(?=[^\s])", r" ", cleaned_text
|
35 |
-
) # Add space after punctuation if not followed by a space
|
36 |
-
cleaned_text = re.sub(
|
37 |
-
r"(?<=[a-z])(?=[A-Z])", r" ", cleaned_text
|
38 |
-
) # Add space between joined words where a lowercase letter is followed by an uppercase letter
|
39 |
-
cleaned_text = re.sub(
|
40 |
-
r"(\w)([A-Z][a-z])", r"\1 \2", cleaned_text
|
41 |
-
) # Add space between camel case words
|
42 |
-
|
43 |
-
# Return the cleaned text
|
44 |
-
return cleaned_text
|
45 |
-
|
46 |
-
|
47 |
-
# Draw OCR bounding boxes with enhanced visual elements
|
48 |
-
def draw_ocr_bboxes(image: Image, detections: sv.Detections) -> Image:
|
49 |
-
"""
|
50 |
-
Draws bounding boxes and labels on the input image based on the OCR detections.
|
51 |
-
|
52 |
-
Args:
|
53 |
-
- image (PIL.Image): The input image on which to draw the bounding boxes and labels.
|
54 |
-
- detections (sv.Detections): The OCR detections containing the bounding box coordinates and labels.
|
55 |
-
|
56 |
-
Returns:
|
57 |
-
PIL.Image: The annotated image with bounding boxes and labels.
|
58 |
-
"""
|
59 |
-
# Copy the input image to avoid modifying the original image
|
60 |
-
annotated_image = image.copy()
|
61 |
-
|
62 |
-
# Calculate the optimal line thickness and text scale based on the image resolution
|
63 |
-
thickness = sv.calculate_optimal_line_thickness(resolution_wh=image.size)
|
64 |
-
text_scale = sv.calculate_optimal_text_scale(resolution_wh=image.size)
|
65 |
-
|
66 |
-
# Initialize the bounding box and label annotators
|
67 |
-
box_annotator = sv.BoxAnnotator(
|
68 |
-
color_lookup=sv.ColorLookup.INDEX, thickness=thickness
|
69 |
-
)
|
70 |
-
label_annotator = sv.LabelAnnotator(
|
71 |
-
color_lookup=sv.ColorLookup.INDEX,
|
72 |
-
text_scale=text_scale,
|
73 |
-
text_thickness=thickness,
|
74 |
-
)
|
75 |
-
|
76 |
-
# Annotate the image with bounding boxes and labels
|
77 |
-
annotated_image = box_annotator.annotate(annotated_image, detections)
|
78 |
-
annotated_image = label_annotator.annotate(annotated_image, detections)
|
79 |
-
|
80 |
-
# Return the annotated image
|
81 |
-
return annotated_image
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|