sitammeur commited on
Commit
6af85a2
1 Parent(s): ffe55fe

Delete src/utils.py

Browse files
Files changed (1) hide show
  1. src/utils.py +0 -81
src/utils.py DELETED
@@ -1,81 +0,0 @@
1
- # Necessary imports
2
- import re
3
- import supervision as sv
4
- from PIL import Image
5
-
6
-
7
- # Text cleaning function
8
- def clean_text(text):
9
- """
10
- Cleans the given text by removing unwanted tokens, extra spaces,
11
- and ensures proper spacing between words and after punctuation marks.
12
-
13
- Args:
14
- text (str): The input text to be cleaned.
15
-
16
- Returns:
17
- str: The cleaned and properly formatted text.
18
- """
19
- # Remove unwanted tokens
20
- text = text.replace("<pad>", "").replace("</s>", "").strip()
21
-
22
- # Split the text into lines and clean each line
23
- lines = text.split("\n")
24
- cleaned_lines = [line.strip() for line in lines if line.strip()]
25
-
26
- # Join the cleaned lines into a single string with a space between each line
27
- cleaned_text = " ".join(cleaned_lines)
28
-
29
- # Ensure proper spacing using regex
30
- cleaned_text = re.sub(
31
- r"\s+", " ", cleaned_text
32
- ) # Replace multiple spaces with a single space
33
- cleaned_text = re.sub(
34
- r"(?<=[.,!?])(?=[^\s])", r" ", cleaned_text
35
- ) # Add space after punctuation if not followed by a space
36
- cleaned_text = re.sub(
37
- r"(?<=[a-z])(?=[A-Z])", r" ", cleaned_text
38
- ) # Add space between joined words where a lowercase letter is followed by an uppercase letter
39
- cleaned_text = re.sub(
40
- r"(\w)([A-Z][a-z])", r"\1 \2", cleaned_text
41
- ) # Add space between camel case words
42
-
43
- # Return the cleaned text
44
- return cleaned_text
45
-
46
-
47
- # Draw OCR bounding boxes with enhanced visual elements
48
- def draw_ocr_bboxes(image: Image, detections: sv.Detections) -> Image:
49
- """
50
- Draws bounding boxes and labels on the input image based on the OCR detections.
51
-
52
- Args:
53
- - image (PIL.Image): The input image on which to draw the bounding boxes and labels.
54
- - detections (sv.Detections): The OCR detections containing the bounding box coordinates and labels.
55
-
56
- Returns:
57
- PIL.Image: The annotated image with bounding boxes and labels.
58
- """
59
- # Copy the input image to avoid modifying the original image
60
- annotated_image = image.copy()
61
-
62
- # Calculate the optimal line thickness and text scale based on the image resolution
63
- thickness = sv.calculate_optimal_line_thickness(resolution_wh=image.size)
64
- text_scale = sv.calculate_optimal_text_scale(resolution_wh=image.size)
65
-
66
- # Initialize the bounding box and label annotators
67
- box_annotator = sv.BoxAnnotator(
68
- color_lookup=sv.ColorLookup.INDEX, thickness=thickness
69
- )
70
- label_annotator = sv.LabelAnnotator(
71
- color_lookup=sv.ColorLookup.INDEX,
72
- text_scale=text_scale,
73
- text_thickness=thickness,
74
- )
75
-
76
- # Annotate the image with bounding boxes and labels
77
- annotated_image = box_annotator.annotate(annotated_image, detections)
78
- annotated_image = label_annotator.annotate(annotated_image, detections)
79
-
80
- # Return the annotated image
81
- return annotated_image