File size: 11,902 Bytes
3943768
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
import os

import numpy as np
from scipy.stats import mode

from utils import have_cv2, have_pillow
from enums import images_num_max_dict


def largest_contour(contours):
    """ Find the largest contour in the list. """
    import cv2
    largest_area = 0
    largest_contour = None
    for contour in contours:
        area = cv2.contourArea(contour)
        if area > largest_area:
            largest_area = area
            largest_contour = contour
    return largest_contour


def is_contour_acceptable(contour, image, size_threshold=0.1, aspect_ratio_range=(0.5, 2), rotation_threshold=30):
    import cv2
    """ Check if the contour is acceptable based on size, aspect ratio, and rotation. """
    # Size check
    image_area = image.shape[0] * image.shape[1]
    contour_area = cv2.contourArea(contour)
    if contour_area / image_area < size_threshold or contour_area / image_area > 1 - size_threshold:
        return False

    # Aspect ratio check
    x, y, w, h = cv2.boundingRect(contour)
    aspect_ratio = w / h
    if aspect_ratio < aspect_ratio_range[0] or aspect_ratio > aspect_ratio_range[1]:
        return False

    # Rotation check
    _, _, angle = cv2.minAreaRect(contour)
    if angle > rotation_threshold:
        return False

    return True


def file_to_cv2(img_file):
    import cv2
    image = cv2.imread(img_file)
    assert os.path.isfile(img_file), '%s not found' % img_file
    if image is None:
        # e.g. small BW gif gridnumbers.gif
        from PIL import Image
        import numpy as np
        pil_image = Image.open(img_file).convert('RGB')
        pil_image_file = img_file + '.pil.png'
        pil_image.save(pil_image_file)
        image = cv2.imread(pil_image_file)
        # open_cv_image = np.array(pil_image, dtype=np.unit8)
        ## Convert RGB to BGR
        # image = open_cv_image[:, :, ::-1].copy()

    # Check if image is loaded
    if image is None:
        raise ValueError("Error: Image for %s not made." % img_file)
    return image


def align_image(img_file):
    import cv2
    from imutils.perspective import four_point_transform
    try:
        # Load the image
        # img_file = '/home/jon/Downloads/fastfood.jpg'
        # img_file = "/home/jon/Documents/reciept.jpg"
        image = file_to_cv2(img_file)
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        blur = cv2.GaussianBlur(gray, (5, 5), 0)

        # Edge detection
        edges = cv2.Canny(blur, 50, 150, apertureSize=3)

        # Find contours
        contours, _ = cv2.findContours(edges, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)

        # Find the largest contour
        largest = largest_contour(contours)

        if largest is not None and is_contour_acceptable(largest, image):
            # Approximate the contour to a polygon
            peri = cv2.arcLength(largest, True)
            approx = cv2.approxPolyDP(largest, 0.02 * peri, True)

            # If the approximated contour has four points, assume it is a quadrilateral
            if len(approx) == 4:
                warped = four_point_transform(image, approx.reshape(4, 2))
                out_file = img_file + "_aligned.jpg"
                cv2.imwrite(out_file, warped)
                return out_file
            else:
                print("Contour is not a quadrilateral.")
                return img_file
        else:
            print("No acceptable contours found.")
            return img_file
    except Exception as e:
        print("Error in align_image:", e, flush=True)
        return img_file


def correct_rotation(img_file, border_size=50):
    import cv2
    # Function to rotate the image to the correct orientation
    # Load the image
    image = file_to_cv2(img_file)

    # Convert the image to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # Detect edges in the image
    edges = cv2.Canny(gray, 50, 150, apertureSize=3)

    # Detect points that form a line using HoughLinesP
    lines = cv2.HoughLinesP(edges, 1, np.pi / 180, threshold=80, minLineLength=100, maxLineGap=10)
    if lines is None or len(lines) == 0:
        return img_file

    # Initialize list of angles
    angles = []

    # Loop over the lines and compute the angle of each line
    for line in lines:
        x1, y1, x2, y2 = line[0]
        angle = np.degrees(np.arctan2(y2 - y1, x2 - x1))
        angles.append(angle)

    # Calculate the most frequent angle in the image
    most_frequent_angle = mode(np.round(angles)).mode

    # Assuming the receipt is horizontal, the text should be near 0 or -180/180 degrees
    # We need to bring the angle to the range (-45, 45) to minimize rotation and keep the text upright
    if most_frequent_angle < -45:
        most_frequent_angle += 90
    elif most_frequent_angle > 45:
        most_frequent_angle -= 90

    # Rotate the original image by the most frequent angle to correct its orientation
    (h, w) = image.shape[:2]
    center = (w // 2, h // 2)
    M = cv2.getRotationMatrix2D(center, most_frequent_angle, 1.0)
    corrected_image = cv2.warpAffine(image, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)

    # Crop the image (removing specified pixels from each border) after rotation
    remove_border_final = False
    if remove_border_final:
        cropped_rotated_image = corrected_image[border_size:-border_size, border_size:-border_size]
    else:
        cropped_rotated_image = corrected_image

    # Save the corrected image
    out_file = img_file + "_rotated.jpg"
    cv2.imwrite(out_file, cropped_rotated_image)

    return out_file


def pad_resize_image_file(img_file, relaxed_resize=False):
    import cv2

    image = file_to_cv2(img_file)
    if relaxed_resize:
        postfix = "_resized.png"
        image = resize_image(image, return_none_if_no_change=True, max_dimension=2048)
    else:
        postfix = "_pad_resized.png"
        image = pad_resize_image(image, return_none_if_no_change=True)
    if image is None:
        new_file = img_file
    else:
        new_file = img_file + postfix
        cv2.imwrite(new_file, image)

    return new_file


def resize_image(image, return_none_if_no_change=True, max_dimension=2048):
    import cv2
    height, width = image.shape[:2]

    # Calculate the scaling factor
    if max(height, width) > max_dimension:
        if height > width:
            scale_factor = max_dimension / height
        else:
            scale_factor = max_dimension / width

        # Compute new dimensions
        new_dimensions = (int(width * scale_factor), int(height * scale_factor))

        # Resize the image
        resized_image = cv2.resize(image, new_dimensions, interpolation=cv2.INTER_AREA)
    else:
        # No resizing needed if the image is already within the desired dimensions
        if return_none_if_no_change:
            return None
        resized_image = image
    return resized_image


def pad_resize_image(image, return_none_if_no_change=False, max_dimension=1024):
    import cv2

    L = max_dimension
    H = max_dimension

    # Load the image
    Li, Hi = image.shape[1], image.shape[0]

    if Li == L and Hi == H:
        if return_none_if_no_change:
            return None
        else:
            return image

    # Calculate the aspect ratio
    aspect_ratio_original = Li / Hi
    aspect_ratio_final = L / H

    # Check the original size and determine the processing needed
    if Li < L and Hi < H:
        # Padding
        padding_x = (L - Li) // 2
        padding_y = (H - Hi) // 2
        image = cv2.copyMakeBorder(image, padding_y, padding_y, padding_x, padding_x, cv2.BORDER_CONSTANT,
                                   value=[0, 0, 0])
    elif Li > L and Hi > H:
        # Resizing
        if aspect_ratio_original < aspect_ratio_final:
            # The image is taller than the target aspect ratio
            new_height = H
            new_width = int(H * aspect_ratio_original)
        else:
            # The image is wider than the target aspect ratio
            new_width = L
            new_height = int(L / aspect_ratio_original)
        image = cv2.resize(image, (new_width, new_height), interpolation=cv2.INTER_AREA)
    else:
        # Intermediate case, resize without cropping
        if aspect_ratio_original < aspect_ratio_final:
            # The image is taller than the target aspect ratio
            new_height = H
            new_width = int(H * aspect_ratio_original)
        else:
            # The image is wider than the target aspect ratio
            new_width = L
            new_height = int(L / aspect_ratio_original)
        image = cv2.resize(image, (new_width, new_height), interpolation=cv2.INTER_AREA)
        padding_x = (L - new_width) // 2
        padding_y = (H - new_height) // 2
        image = cv2.copyMakeBorder(image, padding_y, padding_y, padding_x, padding_x, cv2.BORDER_CONSTANT,
                                   value=[0, 0, 0])

    # debug, to see effect of pad-resize
    # import cv2
    # cv2.imwrite('new1.png', image)

    return image


def fix_image_file(file, do_align=False, do_rotate=False, do_pad=False, relaxed_resize=False):
    # always try to fix rotation/alignment since OCR better etc. in that case
    if have_cv2:
        if do_align:
            aligned_image = align_image(file)
            if aligned_image is not None and os.path.isfile(aligned_image):
                file = aligned_image
        if do_rotate:
            derotated_image = correct_rotation(file)
            if derotated_image is not None and os.path.isfile(derotated_image):
                file = derotated_image
        if do_pad or relaxed_resize:
            file = pad_resize_image_file(file, relaxed_resize=relaxed_resize)
    return file


def get_image_types():
    if have_pillow:
        from PIL import Image
        exts = Image.registered_extensions()
        image_types0 = {ex for ex, f in exts.items() if f in Image.OPEN}
        image_types0 = sorted(image_types0)
        image_types0 = [x[1:] if x.startswith('.') else x for x in image_types0]
    else:
        image_types0 = []
    return image_types0


def get_image_file(image_file, image_control, document_choice, base_model=None, images_num_max=None,
                   image_resolution=None, image_format=None,
                   convert=False,
                   str_bytes=True):
    if image_control is not None:
        img_file = image_control
    elif image_file is not None:
        img_file = image_file
    else:
        image_types = get_image_types()
        img_file = [x for x in document_choice if
                    any(x.endswith('.' + y) for y in image_types)] if document_choice else []

    if not isinstance(img_file, list):
        img_file = [img_file]
    if isinstance(img_file, list) and not img_file:
        img_file = [None]

    final_img_files = []
    for img_file1 in img_file:
        if convert:
            if img_file1 and os.path.isfile(img_file1):
                from vision.utils_vision import img_to_base64
                img_file1 = img_to_base64(img_file1, str_bytes=str_bytes, resolution=image_resolution,
                                          output_format=image_format)
            elif isinstance(img_file1, str):
                # assume already bytes
                img_file1 = img_file1
            else:
                img_file1 = None
        final_img_files.append(img_file1)
    final_img_files = [x for x in final_img_files if x]
    if base_model and images_num_max == -1:
        images_num_max = images_num_max_dict.get(base_model, 1)
    if base_model and images_num_max is None:
        images_num_max = images_num_max_dict.get(base_model, 1) or 1
    if images_num_max is None:
        images_num_max = len(final_img_files)
    if images_num_max <= -1:
        images_num_max = -images_num_max - 1
    final_img_files = final_img_files[:images_num_max]
    return final_img_files