Spaces:
Sleeping
Sleeping
import gradio as gr | |
import cv2 | |
import numpy as np | |
# Function to order points in a consistent manner | |
def order_points(pts): | |
rect = np.zeros((4, 2), dtype='float32') | |
pts = np.array(pts) | |
s = pts.sum(axis=1) | |
rect[0] = pts[np.argmin(s)] | |
rect[2] = pts[np.argmax(s)] | |
diff = np.diff(pts, axis=1) | |
rect[1] = pts[np.argmin(diff)] | |
rect[3] = pts[np.argmax(diff)] | |
return rect.astype('int').tolist() | |
# Function to find the destination points for perspective transform | |
def find_dest(pts): | |
(tl, tr, br, bl) = pts | |
widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2)) | |
widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2)) | |
maxWidth = max(int(widthA), int(widthB)) | |
heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2)) | |
heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2)) | |
maxHeight = max(int(heightA), int(heightB)) | |
destination_corners = [[0, 0], [maxWidth, 0], [maxWidth, maxHeight], [0, maxHeight]] | |
return order_points(destination_corners) | |
def scan(img): | |
# Resize image if dimensions exceed limit | |
dim_limit = 1080 | |
max_dim = max(img.shape) | |
if max_dim > dim_limit: | |
resize_scale = dim_limit / max_dim | |
img = cv2.resize(img, None, fx=resize_scale, fy=resize_scale) | |
orig_img = img.copy() | |
kernel = np.ones((5, 5), np.uint8) | |
# Perform morphological closing | |
img = cv2.morphologyEx(img, cv2.MORPH_CLOSE, kernel, iterations=3) | |
# Initialize mask for GrabCut | |
mask = np.zeros(img.shape[:2], np.uint8) | |
bgdModel = np.zeros((1, 65), np.float64) | |
fgdModel = np.zeros((1, 65), np.float64) | |
rect = (20, 20, img.shape[1] - 20, img.shape[0] - 20) | |
cv2.grabCut(img, mask, rect, bgdModel, fgdModel, 5, cv2.GC_INIT_WITH_RECT) | |
mask2 = np.where((mask == 2) | (mask == 0), 0, 1).astype('uint8') | |
img = img * mask2[:, :, np.newaxis] | |
# Convert image to grayscale and apply Gaussian blur | |
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) | |
gray = cv2.GaussianBlur(gray, (11, 11), 0) | |
# Perform Canny edge detection | |
canny = cv2.Canny(gray, 0, 200) | |
canny = cv2.dilate(canny, cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5))) | |
# Find contours and sort them | |
contours, hierarchy = cv2.findContours(canny, cv2.RETR_LIST, cv2.CHAIN_APPROX_NONE) | |
page = sorted(contours, key=cv2.contourArea, reverse=True)[:5] | |
if len(page) == 0: | |
return orig_img | |
for c in page: | |
epsilon = 0.02 * cv2.arcLength(c, True) | |
corners = cv2.approxPolyDP(c, epsilon, True) | |
if len(corners) == 4: | |
break | |
# Order the corners and find destination points for perspective transform | |
corners = sorted(np.concatenate(corners).tolist()) | |
corners = order_points(corners) | |
destination_corners = find_dest(corners) | |
h, w = orig_img.shape[:2] | |
# Perform perspective transform to obtain top-down view | |
M = cv2.getPerspectiveTransform(np.float32(corners), np.float32(destination_corners)) | |
final = cv2.warpPerspective(orig_img, M, (destination_corners[2][0], destination_corners[2][1]), flags=cv2.INTER_LINEAR) | |
return final | |
def gradio_interface(input_image): | |
if input_image is None: | |
return None | |
img = cv2.cvtColor(input_image, cv2.COLOR_RGB2BGR) | |
processed_img = scan(img) | |
return cv2.cvtColor(processed_img, cv2.COLOR_BGR2RGB) | |
with gr.Blocks() as demo: | |
gr.Markdown("# Document Scanner using OpenCV") | |
gr.Markdown("## [Reference: 4-point OpenCV getPerspective Transform Example](https://pyimagesearch.com/2014/08/25/4-point-opencv-getperspective-transform-example/)") | |
gr.Markdown(""" | |
### Image Processing Flow: | |
- Resize the image if its dimensions exceed the limit. | |
- Apply morphological transformations to enhance document boundaries. | |
- Perform GrabCut for foreground extraction. | |
- Convert the image to grayscale. | |
- Apply Gaussian blur. | |
- Perform Canny edge detection. | |
- Dilate the edges to close gaps. | |
- Find contours and identify the largest ones likely to be the document edges. | |
- If a contour with four corners is found, transform the perspective to obtain a top-down view of the document. | |
""") | |
image_input = gr.Image(type="numpy", label="Upload Image") | |
image_output = gr.Image(type="numpy", label="Processed Image") | |
gr.Interface(fn=gradio_interface, inputs=image_input, outputs=image_output, allow_flagging="never") | |
demo.launch(debug=True) | |