Spaces:

zliang
/

fastpaperlayout

Sleeping

App Files Files Community

zliang commited on May 31

Commit

cff5fa2

•

1 Parent(s): 9b47e37

Update app.py

Browse files

Files changed (1) hide show

app.py +16 -23

app.py CHANGED Viewed

@@ -11,18 +11,15 @@ model = YOLOv10("best.pt")
 figure_class_index = 3  # class index for figures
 table_class_index = 4   # class index for tables
-# Function to perform inference on a batch of images and return bounding boxes for figures and tables
-def infer_images_and_get_boxes(images, confidence_threshold=0.6):
-    results = model.predict(images)
-    all_boxes = []
-    for result in results:
-        boxes = [
-            (int(box.xyxy[0][0]), int(box.xyxy[0][1]), int(box.xyxy[0][2]), int(box.xyxy[0][3]))
-            for box in result.boxes
-            if int(box.cls[0]) in {figure_class_index, table_class_index} and box.conf[0] > confidence_threshold
-        ]
-        all_boxes.append(boxes)
-    return all_boxes
 # Function to crop images from the boxes
 def crop_images_from_boxes(image, boxes, scale_factor):
@@ -48,17 +45,13 @@ def process_pdf(pdf_file):
     # Pre-cache all page pixmaps at low DPI
     low_res_pixmaps = [page.get_pixmap(dpi=low_dpi) for page in doc]
-    # Prepare a batch of low resolution images for inference
-    low_res_imgs = [
-        np.frombuffer(pix.samples, dtype=np.uint8).reshape(pix.height, pix.width, 3)
-        for pix in low_res_pixmaps
-    ]
-    # Run inference on the batch of low resolution images
-    all_boxes = infer_images_and_get_boxes(low_res_imgs)
-    # Loop through each page and corresponding boxes
-    for page_num, (low_res_img, boxes) in enumerate(zip(low_res_imgs, all_boxes)):
         if boxes:
             # Load high DPI image for cropping only if boxes are found
             high_res_pix = doc[page_num].get_pixmap(dpi=high_dpi)

 figure_class_index = 3  # class index for figures
 table_class_index = 4   # class index for tables
+# Function to perform inference on a single image and return bounding boxes for figures and tables
+def infer_image_and_get_boxes(image, confidence_threshold=0.6):
+    results = model.predict(image)
+    boxes = [
+        (int(box.xyxy[0][0]), int(box.xyxy[0][1]), int(box.xyxy[0][2]), int(box.xyxy[0][3]))
+        for result in results for box in result.boxes
+        if int(box.cls[0]) in {figure_class_index, table_class_index} and box.conf[0] > confidence_threshold
+    ]
+    return boxes
 # Function to crop images from the boxes
 def crop_images_from_boxes(image, boxes, scale_factor):
     # Pre-cache all page pixmaps at low DPI
     low_res_pixmaps = [page.get_pixmap(dpi=low_dpi) for page in doc]
+    # Loop through each page
+    for page_num, low_res_pix in enumerate(low_res_pixmaps):
+        low_res_img = np.frombuffer(low_res_pix.samples, dtype=np.uint8).reshape(low_res_pix.height, low_res_pix.width, 3)
+        # Get bounding boxes from low DPI image
+        boxes = infer_image_and_get_boxes(low_res_img)
         if boxes:
             # Load high DPI image for cropping only if boxes are found
             high_res_pix = doc[page_num].get_pixmap(dpi=high_dpi)