Spaces:
Sleeping
Sleeping
WebashalarForML
commited on
Commit
•
d994215
1
Parent(s):
6c1cb89
Update utility/utils.py
Browse files- utility/utils.py +34 -36
utility/utils.py
CHANGED
@@ -32,15 +32,16 @@ def draw_boxes(image, bounds, color='red', width=2):
|
|
32 |
draw.line([*p0, *p1, *p2, *p3, *p0], fill=color, width=width)
|
33 |
return image
|
34 |
|
35 |
-
#Image Quality upscaling
|
36 |
# Load image using OpenCV
|
37 |
def load_image(image_path):
|
38 |
-
|
|
|
|
|
|
|
39 |
|
40 |
-
# Function for upscaling image using OpenCV's INTER_CUBIC
|
41 |
def upscale_image(image, scale=2):
|
42 |
height, width = image.shape[:2]
|
43 |
-
# Simple upscaling using cubic interpolation
|
44 |
upscaled_image = cv2.resize(image, (width * scale, height * scale), interpolation=cv2.INTER_CUBIC)
|
45 |
return upscaled_image
|
46 |
|
@@ -58,11 +59,9 @@ def sharpen_image(image):
|
|
58 |
|
59 |
# Function to increase contrast and enhance details without changing color
|
60 |
def enhance_image(image):
|
61 |
-
# Convert from BGR to RGB for PIL processing, then back to BGR
|
62 |
pil_img = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
|
63 |
enhancer = ImageEnhance.Contrast(pil_img)
|
64 |
enhanced_image = enhancer.enhance(1.5)
|
65 |
-
# Convert back to BGR
|
66 |
enhanced_image_bgr = cv2.cvtColor(np.array(enhanced_image), cv2.COLOR_RGB2BGR)
|
67 |
return enhanced_image_bgr
|
68 |
|
@@ -70,59 +69,58 @@ def enhance_image(image):
|
|
70 |
def process_image(image_path, scale=2):
|
71 |
# Load the image
|
72 |
image = load_image(image_path)
|
73 |
-
|
74 |
# Upscale the image
|
75 |
upscaled_image = upscale_image(image, scale)
|
76 |
-
|
77 |
# Reduce noise
|
78 |
denoised_image = reduce_noise(upscaled_image)
|
79 |
-
|
80 |
# Sharpen the image
|
81 |
sharpened_image = sharpen_image(denoised_image)
|
82 |
-
|
83 |
# Enhance the image contrast and details without changing color
|
84 |
final_image = enhance_image(sharpened_image)
|
85 |
-
|
86 |
-
return final_image
|
87 |
|
|
|
88 |
|
89 |
def ocr_with_paddle(img):
|
90 |
finaltext = ''
|
91 |
model_dir = os.getenv('PADDLEOCR_MODEL_DIR', '/tmp/.paddleocr')
|
92 |
ocr = PaddleOCR(lang='en', use_angle_cls=True, det_model_dir=model_dir)
|
93 |
-
# img_path = 'exp.jpeg'
|
94 |
result = ocr.ocr(img)
|
95 |
-
|
96 |
for i in range(len(result[0])):
|
97 |
text = result[0][i][1][0]
|
98 |
-
finaltext += ' '+ text
|
99 |
return finaltext
|
100 |
|
101 |
def extract_text_from_images(image_paths, RESULT_FOLDER):
|
102 |
all_extracted_texts = {}
|
103 |
-
all_extracted_imgs={}
|
104 |
for image_path in image_paths:
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
# Draw boxes on the processed image
|
109 |
-
img_result = Image.fromarray(enhanced_image)
|
110 |
-
#draw_boxes(img_result, bounds)
|
111 |
-
|
112 |
-
result_image_path = os.path.join(RESULT_FOLDER, f'result_{os.path.basename(image_path)}')
|
113 |
-
img_result.save(result_image_path) # Save the processed image
|
114 |
-
|
115 |
-
# Perform OCR on the enhanced image
|
116 |
-
result=ocr_with_paddle(enhanced_image)
|
117 |
-
# results = reader.readtext(enhanced_image)
|
118 |
-
# extracted_text = " ".join([res[1] for res in results])
|
119 |
-
|
120 |
-
all_extracted_texts[image_path] =result
|
121 |
-
all_extracted_imgs[image_path] = result_image_path
|
122 |
-
# Convert to JSON-compatible structure
|
123 |
-
all_extracted_imgs_json = {str(k): str(v) for k, v in all_extracted_imgs.items()}
|
124 |
-
return all_extracted_texts,all_extracted_imgs_json
|
125 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
126 |
|
127 |
# Function to call the Gemma model and process the output as Json
|
128 |
def Data_Extractor(data, client=client):
|
|
|
32 |
draw.line([*p0, *p1, *p2, *p3, *p0], fill=color, width=width)
|
33 |
return image
|
34 |
|
|
|
35 |
# Load image using OpenCV
|
36 |
def load_image(image_path):
|
37 |
+
image = cv2.imread(image_path)
|
38 |
+
if image is None:
|
39 |
+
raise ValueError(f"Could not load image from {image_path}. It may be corrupted or the path is incorrect.")
|
40 |
+
return image
|
41 |
|
42 |
+
# Function for upscaling image using OpenCV's INTER_CUBIC
|
43 |
def upscale_image(image, scale=2):
|
44 |
height, width = image.shape[:2]
|
|
|
45 |
upscaled_image = cv2.resize(image, (width * scale, height * scale), interpolation=cv2.INTER_CUBIC)
|
46 |
return upscaled_image
|
47 |
|
|
|
59 |
|
60 |
# Function to increase contrast and enhance details without changing color
|
61 |
def enhance_image(image):
|
|
|
62 |
pil_img = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
|
63 |
enhancer = ImageEnhance.Contrast(pil_img)
|
64 |
enhanced_image = enhancer.enhance(1.5)
|
|
|
65 |
enhanced_image_bgr = cv2.cvtColor(np.array(enhanced_image), cv2.COLOR_RGB2BGR)
|
66 |
return enhanced_image_bgr
|
67 |
|
|
|
69 |
def process_image(image_path, scale=2):
|
70 |
# Load the image
|
71 |
image = load_image(image_path)
|
72 |
+
|
73 |
# Upscale the image
|
74 |
upscaled_image = upscale_image(image, scale)
|
75 |
+
|
76 |
# Reduce noise
|
77 |
denoised_image = reduce_noise(upscaled_image)
|
78 |
+
|
79 |
# Sharpen the image
|
80 |
sharpened_image = sharpen_image(denoised_image)
|
81 |
+
|
82 |
# Enhance the image contrast and details without changing color
|
83 |
final_image = enhance_image(sharpened_image)
|
|
|
|
|
84 |
|
85 |
+
return final_image
|
86 |
|
87 |
def ocr_with_paddle(img):
|
88 |
finaltext = ''
|
89 |
model_dir = os.getenv('PADDLEOCR_MODEL_DIR', '/tmp/.paddleocr')
|
90 |
ocr = PaddleOCR(lang='en', use_angle_cls=True, det_model_dir=model_dir)
|
|
|
91 |
result = ocr.ocr(img)
|
92 |
+
|
93 |
for i in range(len(result[0])):
|
94 |
text = result[0][i][1][0]
|
95 |
+
finaltext += ' ' + text
|
96 |
return finaltext
|
97 |
|
98 |
def extract_text_from_images(image_paths, RESULT_FOLDER):
|
99 |
all_extracted_texts = {}
|
100 |
+
all_extracted_imgs = {}
|
101 |
for image_path in image_paths:
|
102 |
+
try:
|
103 |
+
# Enhance the image before OCR
|
104 |
+
enhanced_image = process_image(image_path, scale=2)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
105 |
|
106 |
+
# Draw boxes on the processed image (optional, requires bounds)
|
107 |
+
img_result = Image.fromarray(enhanced_image)
|
108 |
+
|
109 |
+
result_image_path = os.path.join(RESULT_FOLDER, f'result_{os.path.basename(image_path)}')
|
110 |
+
img_result.save(result_image_path) # Save the processed image
|
111 |
+
|
112 |
+
# Perform OCR on the enhanced image
|
113 |
+
result = ocr_with_paddle(enhanced_image)
|
114 |
+
|
115 |
+
all_extracted_texts[image_path] = result
|
116 |
+
all_extracted_imgs[image_path] = result_image_path
|
117 |
+
except ValueError as ve:
|
118 |
+
print(f"Error processing image {image_path}: {ve}")
|
119 |
+
continue # Continue to the next image if there's an error
|
120 |
+
|
121 |
+
# Convert to JSON-compatible structure
|
122 |
+
all_extracted_imgs_json = {str(k): str(v) for k, v in all_extracted_imgs.items()}
|
123 |
+
return all_extracted_texts, all_extracted_imgs_json
|
124 |
|
125 |
# Function to call the Gemma model and process the output as Json
|
126 |
def Data_Extractor(data, client=client):
|