WebashalarForML commited on
Commit
d994215
1 Parent(s): 6c1cb89

Update utility/utils.py

Browse files
Files changed (1) hide show
  1. utility/utils.py +34 -36
utility/utils.py CHANGED
@@ -32,15 +32,16 @@ def draw_boxes(image, bounds, color='red', width=2):
32
  draw.line([*p0, *p1, *p2, *p3, *p0], fill=color, width=width)
33
  return image
34
 
35
- #Image Quality upscaling
36
  # Load image using OpenCV
37
  def load_image(image_path):
38
- return cv2.imread(image_path)
 
 
 
39
 
40
- # Function for upscaling image using OpenCV's INTER_CUBIC or ESRGAN (if available)
41
  def upscale_image(image, scale=2):
42
  height, width = image.shape[:2]
43
- # Simple upscaling using cubic interpolation
44
  upscaled_image = cv2.resize(image, (width * scale, height * scale), interpolation=cv2.INTER_CUBIC)
45
  return upscaled_image
46
 
@@ -58,11 +59,9 @@ def sharpen_image(image):
58
 
59
  # Function to increase contrast and enhance details without changing color
60
  def enhance_image(image):
61
- # Convert from BGR to RGB for PIL processing, then back to BGR
62
  pil_img = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
63
  enhancer = ImageEnhance.Contrast(pil_img)
64
  enhanced_image = enhancer.enhance(1.5)
65
- # Convert back to BGR
66
  enhanced_image_bgr = cv2.cvtColor(np.array(enhanced_image), cv2.COLOR_RGB2BGR)
67
  return enhanced_image_bgr
68
 
@@ -70,59 +69,58 @@ def enhance_image(image):
70
  def process_image(image_path, scale=2):
71
  # Load the image
72
  image = load_image(image_path)
73
-
74
  # Upscale the image
75
  upscaled_image = upscale_image(image, scale)
76
-
77
  # Reduce noise
78
  denoised_image = reduce_noise(upscaled_image)
79
-
80
  # Sharpen the image
81
  sharpened_image = sharpen_image(denoised_image)
82
-
83
  # Enhance the image contrast and details without changing color
84
  final_image = enhance_image(sharpened_image)
85
-
86
- return final_image
87
 
 
88
 
89
  def ocr_with_paddle(img):
90
  finaltext = ''
91
  model_dir = os.getenv('PADDLEOCR_MODEL_DIR', '/tmp/.paddleocr')
92
  ocr = PaddleOCR(lang='en', use_angle_cls=True, det_model_dir=model_dir)
93
- # img_path = 'exp.jpeg'
94
  result = ocr.ocr(img)
95
-
96
  for i in range(len(result[0])):
97
  text = result[0][i][1][0]
98
- finaltext += ' '+ text
99
  return finaltext
100
 
101
  def extract_text_from_images(image_paths, RESULT_FOLDER):
102
  all_extracted_texts = {}
103
- all_extracted_imgs={}
104
  for image_path in image_paths:
105
- # Enhance the image before OCR
106
- enhanced_image = process_image(image_path, scale=2)
107
- #bounds = reader.readtext(enhanced_image)
108
- # Draw boxes on the processed image
109
- img_result = Image.fromarray(enhanced_image)
110
- #draw_boxes(img_result, bounds)
111
-
112
- result_image_path = os.path.join(RESULT_FOLDER, f'result_{os.path.basename(image_path)}')
113
- img_result.save(result_image_path) # Save the processed image
114
-
115
- # Perform OCR on the enhanced image
116
- result=ocr_with_paddle(enhanced_image)
117
- # results = reader.readtext(enhanced_image)
118
- # extracted_text = " ".join([res[1] for res in results])
119
-
120
- all_extracted_texts[image_path] =result
121
- all_extracted_imgs[image_path] = result_image_path
122
- # Convert to JSON-compatible structure
123
- all_extracted_imgs_json = {str(k): str(v) for k, v in all_extracted_imgs.items()}
124
- return all_extracted_texts,all_extracted_imgs_json
125
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
126
 
127
  # Function to call the Gemma model and process the output as Json
128
  def Data_Extractor(data, client=client):
 
32
  draw.line([*p0, *p1, *p2, *p3, *p0], fill=color, width=width)
33
  return image
34
 
 
35
  # Load image using OpenCV
36
  def load_image(image_path):
37
+ image = cv2.imread(image_path)
38
+ if image is None:
39
+ raise ValueError(f"Could not load image from {image_path}. It may be corrupted or the path is incorrect.")
40
+ return image
41
 
42
+ # Function for upscaling image using OpenCV's INTER_CUBIC
43
  def upscale_image(image, scale=2):
44
  height, width = image.shape[:2]
 
45
  upscaled_image = cv2.resize(image, (width * scale, height * scale), interpolation=cv2.INTER_CUBIC)
46
  return upscaled_image
47
 
 
59
 
60
  # Function to increase contrast and enhance details without changing color
61
  def enhance_image(image):
 
62
  pil_img = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
63
  enhancer = ImageEnhance.Contrast(pil_img)
64
  enhanced_image = enhancer.enhance(1.5)
 
65
  enhanced_image_bgr = cv2.cvtColor(np.array(enhanced_image), cv2.COLOR_RGB2BGR)
66
  return enhanced_image_bgr
67
 
 
69
  def process_image(image_path, scale=2):
70
  # Load the image
71
  image = load_image(image_path)
72
+
73
  # Upscale the image
74
  upscaled_image = upscale_image(image, scale)
75
+
76
  # Reduce noise
77
  denoised_image = reduce_noise(upscaled_image)
78
+
79
  # Sharpen the image
80
  sharpened_image = sharpen_image(denoised_image)
81
+
82
  # Enhance the image contrast and details without changing color
83
  final_image = enhance_image(sharpened_image)
 
 
84
 
85
+ return final_image
86
 
87
  def ocr_with_paddle(img):
88
  finaltext = ''
89
  model_dir = os.getenv('PADDLEOCR_MODEL_DIR', '/tmp/.paddleocr')
90
  ocr = PaddleOCR(lang='en', use_angle_cls=True, det_model_dir=model_dir)
 
91
  result = ocr.ocr(img)
92
+
93
  for i in range(len(result[0])):
94
  text = result[0][i][1][0]
95
+ finaltext += ' ' + text
96
  return finaltext
97
 
98
  def extract_text_from_images(image_paths, RESULT_FOLDER):
99
  all_extracted_texts = {}
100
+ all_extracted_imgs = {}
101
  for image_path in image_paths:
102
+ try:
103
+ # Enhance the image before OCR
104
+ enhanced_image = process_image(image_path, scale=2)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
 
106
+ # Draw boxes on the processed image (optional, requires bounds)
107
+ img_result = Image.fromarray(enhanced_image)
108
+
109
+ result_image_path = os.path.join(RESULT_FOLDER, f'result_{os.path.basename(image_path)}')
110
+ img_result.save(result_image_path) # Save the processed image
111
+
112
+ # Perform OCR on the enhanced image
113
+ result = ocr_with_paddle(enhanced_image)
114
+
115
+ all_extracted_texts[image_path] = result
116
+ all_extracted_imgs[image_path] = result_image_path
117
+ except ValueError as ve:
118
+ print(f"Error processing image {image_path}: {ve}")
119
+ continue # Continue to the next image if there's an error
120
+
121
+ # Convert to JSON-compatible structure
122
+ all_extracted_imgs_json = {str(k): str(v) for k, v in all_extracted_imgs.items()}
123
+ return all_extracted_texts, all_extracted_imgs_json
124
 
125
  # Function to call the Gemma model and process the output as Json
126
  def Data_Extractor(data, client=client):