Spaces:

ad4r5hgs
/

opencv-doc-scanner

Sleeping

App Files Files Community

opencv-doc-scanner / app.py

ad4r5hgs

Upload 2 files

f5a8ceb verified 7 months ago

raw

history blame contribute delete

4.52 kB

	import gradio as gr
	import cv2
	import numpy as np

	# Function to order points in a consistent manner
	def order_points(pts):
	rect = np.zeros((4, 2), dtype='float32')
	pts = np.array(pts)
	s = pts.sum(axis=1)
	rect[0] = pts[np.argmin(s)]
	rect[2] = pts[np.argmax(s)]
	diff = np.diff(pts, axis=1)
	rect[1] = pts[np.argmin(diff)]
	rect[3] = pts[np.argmax(diff)]
	return rect.astype('int').tolist()

	# Function to find the destination points for perspective transform
	def find_dest(pts):
	(tl, tr, br, bl) = pts
	widthA = np.sqrt(((br[0] - bl[0]) 2) + ((br[1] - bl[1]) 2))
	widthB = np.sqrt(((tr[0] - tl[0]) 2) + ((tr[1] - tl[1]) 2))
	maxWidth = max(int(widthA), int(widthB))
	heightA = np.sqrt(((tr[0] - br[0]) 2) + ((tr[1] - br[1]) 2))
	heightB = np.sqrt(((tl[0] - bl[0]) 2) + ((tl[1] - bl[1]) 2))
	maxHeight = max(int(heightA), int(heightB))
	destination_corners = [[0, 0], [maxWidth, 0], [maxWidth, maxHeight], [0, maxHeight]]
	return order_points(destination_corners)

	def scan(img):
	# Resize image if dimensions exceed limit
	dim_limit = 1080
	max_dim = max(img.shape)

	if max_dim > dim_limit:
	resize_scale = dim_limit / max_dim
	img = cv2.resize(img, None, fx=resize_scale, fy=resize_scale)

	orig_img = img.copy()
	kernel = np.ones((5, 5), np.uint8)

	# Perform morphological closing
	img = cv2.morphologyEx(img, cv2.MORPH_CLOSE, kernel, iterations=3)

	# Initialize mask for GrabCut
	mask = np.zeros(img.shape[:2], np.uint8)
	bgdModel = np.zeros((1, 65), np.float64)
	fgdModel = np.zeros((1, 65), np.float64)
	rect = (20, 20, img.shape[1] - 20, img.shape[0] - 20)
	cv2.grabCut(img, mask, rect, bgdModel, fgdModel, 5, cv2.GC_INIT_WITH_RECT)
	mask2 = np.where((mask == 2) \| (mask == 0), 0, 1).astype('uint8')
	img = img * mask2[:, :, np.newaxis]

	# Convert image to grayscale and apply Gaussian blur
	gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
	gray = cv2.GaussianBlur(gray, (11, 11), 0)

	# Perform Canny edge detection
	canny = cv2.Canny(gray, 0, 200)
	canny = cv2.dilate(canny, cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5)))

	# Find contours and sort them
	contours, hierarchy = cv2.findContours(canny, cv2.RETR_LIST, cv2.CHAIN_APPROX_NONE)
	page = sorted(contours, key=cv2.contourArea, reverse=True)[:5]

	if len(page) == 0:
	return orig_img

	for c in page:
	epsilon = 0.02 * cv2.arcLength(c, True)
	corners = cv2.approxPolyDP(c, epsilon, True)
	if len(corners) == 4:
	break

	# Order the corners and find destination points for perspective transform
	corners = sorted(np.concatenate(corners).tolist())
	corners = order_points(corners)
	destination_corners = find_dest(corners)
	h, w = orig_img.shape[:2]

	# Perform perspective transform to obtain top-down view
	M = cv2.getPerspectiveTransform(np.float32(corners), np.float32(destination_corners))
	final = cv2.warpPerspective(orig_img, M, (destination_corners[2][0], destination_corners[2][1]), flags=cv2.INTER_LINEAR)

	return final

	def gradio_interface(input_image):
	if input_image is None:
	return None
	img = cv2.cvtColor(input_image, cv2.COLOR_RGB2BGR)
	processed_img = scan(img)
	return cv2.cvtColor(processed_img, cv2.COLOR_BGR2RGB)

	with gr.Blocks() as demo:
	gr.Markdown("# Document Scanner using OpenCV")
	gr.Markdown("## [Reference: 4-point OpenCV getPerspective Transform Example](https://pyimagesearch.com/2014/08/25/4-point-opencv-getperspective-transform-example/)")
	gr.Markdown("""
	### Image Processing Flow:
	- Resize the image if its dimensions exceed the limit.
	- Apply morphological transformations to enhance document boundaries.
	- Perform GrabCut for foreground extraction.
	- Convert the image to grayscale.
	- Apply Gaussian blur.
	- Perform Canny edge detection.
	- Dilate the edges to close gaps.
	- Find contours and identify the largest ones likely to be the document edges.
	- If a contour with four corners is found, transform the perspective to obtain a top-down view of the document.
	""")
	image_input = gr.Image(type="numpy", label="Upload Image")
	image_output = gr.Image(type="numpy", label="Processed Image")
	gr.Interface(fn=gradio_interface, inputs=image_input, outputs=image_output, allow_flagging="never")

	demo.launch(debug=True)