Spaces:

Abijith
/

Table-Detection-and-Recognition-using-DETR

Runtime error

App Files Files Community

Abijith commited on Oct 2, 2023

Commit

38744b1

•

1 Parent(s): fc047c9

Upload 5 files

Browse files

Files changed (5) hide show

codes/data_extraction.py +62 -0
codes/image_processing.py +55 -0
codes/table_detection.py +28 -0
codes/table_preprocessing.py +35 -0
codes/table_recognition.py +33 -0

codes/data_extraction.py ADDED Viewed

	@@ -0,0 +1,62 @@

+import os
+import re
+import pytesseract
+from pytesseract import Output
+from datatypes.datatypes import Row, Cell
+from codes.image_processing import ImageProcessor
+from datatypes.config import Config
+class TextDataExtraction():
+    def __init__(self):
+        pass
+    def clean_ocr_data(self, value):
+        transf = ''.join(e for e in value if e==' 'or e=='.' or e.isalnum())
+        transf.strip()
+        return transf
+    def pytess(self, cell_pil_img):
+        return ' '.join(pytesseract.image_to_data(cell_pil_img, output_type=Output.DICT, config='-c tessedit_char_blacklist=œ˜â€œï¬â™Ã©œ¢!|”?«“¥ --psm 6 preserve_interword_spaces')['text']).strip()
+    def cell_data_extraction(self, image,  table_data):
+        for table in table_data.tables:
+            tableimg_processor = ImageProcessor()
+            table_bbox = table.detection_box
+            table_image = image.crop(table_bbox)
+            table_image = tableimg_processor.image_padding(table_image, padd=Config['table_padd'])
+            for row_idx, table_row in enumerate(table.ordered_recognitiondata[0].recognized_row):
+                row_obj = Row([])
+                xmin_row, ymin_row, xmax_row, ymax_row, _, _ = table_row
+                row_image = table_image.crop((xmin_row,ymin_row,xmax_row,ymax_row))
+                row_width, row_height = row_image.size
+                row_obj.rowindex = row_idx
+                # Cell bounding box creation
+                xa, ya, xb, yb = 0, 0, 0, row_height
+                for indx, table_column in enumerate(table.ordered_recognitiondata[0].recognized_column):
+                    cell_obj = Cell()
+                    xmin_col, _, xmax_col, _,_,_ = table_column
+                    xmin_col, xmax_col = xmin_col -Config['table_padd'], xmax_col - Config['table_padd']
+                    xa = xmin_col
+                    xb = xmax_col
+                    if indx == 0:
+                        xa = 0
+                    if indx == len(table.ordered_recognitiondata[0].recognized_column)-1:
+                        xb = row_width
+                    cell_img = row_image.crop((xa, ya, xb, yb))
+                    xa, ya, xb, yb = xa, ya, xb, yb
+                    cell_value = self.pytess(cell_img)
+                    transformed_cell_value = self.clean_ocr_data(cell_value)
+                    cell_obj.cellindex = indx
+                    cell_obj.value = transformed_cell_value
+                    row_obj.extracted_cells.append(cell_obj)
+                table.extracted_rows.append(row_obj)
+        return table_data

codes/image_processing.py ADDED Viewed

	@@ -0,0 +1,55 @@

+import os
+import cv2
+import numpy as np
+from PIL import Image
+# Some image process techniques to improve the images.
+class ImageProcessor():
+    def __init__(self):
+        pass
+    def PIL_to_cv2(self, pil_img):
+        return cv2.cvtColor(np.array(pil_img), cv2.COLOR_RGB2BGR)
+    def cv2_to_PIL(self, cv_img):
+        return Image.fromarray(cv2.cvtColor(cv_img, cv2.COLOR_BGR2RGB))
+    def image_padding(self, image, padd):
+        '''
+        Image boarder padding to avoid table image loss
+        '''
+        width, height = image.size
+        new_width = width +(2*padd)
+        new_height = height + (2*padd)
+        color = (255, 255, 255)
+        result = Image.new(image.mode, (new_width, new_height), color)
+        result.paste(image, (padd, padd))
+        return result
+    def sharpen_image(self, pil_img):
+        img = self.PIL_to_cv2(pil_img)
+        '''
+        Image sharpening kernal
+        '''
+        sharpen_kernel = np.array([[-1, -1, -1],
+                                [-1,  9, -1],
+                                [-1, -1, -1]])
+        sharpen = cv2.filter2D(img, -1, sharpen_kernel)
+        pil_img = self.cv2_to_PIL(sharpen)
+        return pil_img
+    def binarizeBlur_image(self, pil_img):
+        image = self.PIL_to_cv2(pil_img)
+        thresh = cv2.threshold(image, 0, 255, cv2.THRESH_BINARY_INV)[1]
+        result = cv2.GaussianBlur(thresh, (3,3), 0)
+        result = 255 - result
+        return self.cv2_to_PIL(result)
+    def whole_image_processing(self, pil_img):
+        sharpen_img = self.sharpen_image(pil_img)
+        binary_img = self.binarizeBlur_image(sharpen_img)
+        return binary_img

codes/table_detection.py ADDED Viewed

	@@ -0,0 +1,28 @@

+import os
+from datatypes.datatypes import ImageData
+from datatypes.datatypes import TableDetectionData
+class TableDetection():
+    def __init__(self, feature_extractor, detection_model, threshold):
+        self.feature_extractor = feature_extractor
+        self.detection_model = detection_model
+        self.threshold = threshold
+    def table_detection_from_image(self, detection_image):
+        table_data_extraction = ImageData([])
+        image_width, image_height = detection_image.size
+        detection_encoding = self.feature_extractor(detection_image, return_tensors='pt')
+        detection_output = self.detection_model(**detection_encoding)
+        detection_results = self.feature_extractor.post_process_object_detection(detection_output, threshold=0.3, target_sizes=[(image_height, image_width)])
+        detection_results = detection_results[0]
+        # copying the detections
+        for score, label, bbox in zip((detection_results['scores']).tolist(), (detection_results['labels']).tolist(), (detection_results['boxes']).tolist()):
+            detection_table_results = TableDetectionData()
+            detection_table_results.detection_score = score
+            detection_table_results.detection_label = label
+            detection_table_results.detection_box = bbox
+            table_data_extraction.tables.append(detection_table_results)
+        return table_data_extraction

codes/table_preprocessing.py ADDED Viewed

	@@ -0,0 +1,35 @@

+import os
+from datatypes.datatypes import DetectionLabels, TableRecognitionOrdered
+class TablePreprocessor():
+    def __init__(self):
+        pass
+    def table_structure_sorting(self, table_data):
+        for table in table_data.tables:
+            recognized_row = []
+            recognized_column = []
+            recognized_ord_obj = TableRecognitionOrdered([])
+            # print(table.recognitiondata[0])
+            for score, label, box in zip(table.recognitiondata[0].scores, table.recognitiondata[0].labels, table.recognitiondata[0].boxes):
+                # print(score, label, box)
+                newbox = []
+                if label == DetectionLabels.table_row.value:
+                    newbox = box
+                    newbox.append(label)
+                    newbox.append(score)
+                    recognized_row.append(newbox)
+                if label == DetectionLabels.table_column.value:
+                    newbox = box
+                    newbox.append(label)
+                    newbox.append(score)
+                    recognized_column.append(newbox)
+            recognized_row.sort(key=lambda x:x[1])
+            recognized_column.sort(key=lambda x:x[0])
+            recognized_ord_obj.recognized_row = recognized_row
+            recognized_ord_obj.recognized_column = recognized_column
+            table.ordered_recognitiondata.append(recognized_ord_obj)
+        return table_data

codes/table_recognition.py ADDED Viewed

	@@ -0,0 +1,33 @@

+import os
+from datatypes.datatypes import TableRecognitionData, TableDetectionData
+from codes.image_processing import ImageProcessor
+from datatypes.config import Config
+class TableRecognition:
+    def __init__(self, feature_extractor, recognition_model, threshold):
+        self.feature_extractor = feature_extractor
+        self.recognition_model = recognition_model
+        self.threshold = threshold
+    def table_recognition_from_detection(self, recognition_image, detection_results):
+        for table in detection_results.tables:
+            recognised_table_results = TableRecognitionData()
+            bbox = table.detection_box
+            detected_tbl = recognition_image.crop(bbox)
+            img_processor = ImageProcessor()
+            padded_table = img_processor.image_padding(image=detected_tbl, padd=Config['table_padd'])
+            width, height = padded_table.size
+            recognition_encoding = self.feature_extractor(padded_table, return_tensors='pt')
+            recognition_output = self.recognition_model(**recognition_encoding)
+            recognition_results = self.feature_extractor.post_process_object_detection(recognition_output, threshold=0.7, target_sizes=[(height, width)])
+            recognition_results = recognition_results[0]
+            recognised_table_results.scores = (recognition_results['scores'].tolist())
+            recognised_table_results.labels = (recognition_results['labels'].tolist())
+            recognised_table_results.boxes = (recognition_results['boxes'].tolist())
+            table.recognitiondata.append(recognised_table_results)
+        return detection_results