Spaces:

zliang
/

fastpaperlayout

Sleeping

App Files Files Community

zliang commited on May 23

Commit

ec2e6e8

•

1 Parent(s): 3cadd69

Update app.py

Browse files

Files changed (1) hide show

app.py +14 -17

app.py CHANGED Viewed

@@ -1,4 +1,3 @@
-# Load the trained model
 import gradio as gr
 from ultralytics import YOLO
 import cv2
@@ -6,6 +5,7 @@ import numpy as np
 import fitz  # PyMuPDF
 from PIL import Image
 import spaces
 # Load the trained model
 model_path = 'best.pt'  # Replace with the path to your trained .pt file
 model = YOLO(model_path)
@@ -16,11 +16,8 @@ table_class_index = 4   # class index for tables
 # Function to perform inference on an image and return bounding boxes for figures and tables
 def infer_image_and_get_boxes(image, confidence_threshold=0.6):
-    # Convert the image from BGR to RGB
-    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
     # Perform inference
-    results = model(image_rgb)
     boxes = []
     # Extract results
@@ -55,7 +52,7 @@ def process_pdf(pdf_file):
     # Calculate the scaling factor
     scale_factor = high_dpi / low_dpi
     # Loop through each page
     for page_num in range(len(doc)):
         page = doc.load_page(page_num)
@@ -64,18 +61,19 @@ def process_pdf(pdf_file):
         low_res_pix = page.get_pixmap(dpi=low_dpi)
         low_res_img = Image.frombytes("RGB", [low_res_pix.width, low_res_pix.height], low_res_pix.samples)
         low_res_img = np.array(low_res_img)
         # Get bounding boxes from low DPI image
         boxes = infer_image_and_get_boxes(low_res_img)
-        # Load high DPI image for cropping
-        high_res_pix = page.get_pixmap(dpi=high_dpi)
-        high_res_img = Image.frombytes("RGB", [high_res_pix.width, high_res_pix.height], high_res_pix.samples)
-        high_res_img = np.array(high_res_img)
-        # Crop images at high DPI
-        cropped_imgs = crop_images_from_boxes(high_res_img, boxes, scale_factor)
-        all_cropped_images.extend(cropped_imgs)
     return all_cropped_images
@@ -90,4 +88,3 @@ iface = gr.Interface(
 # Launch the app
 iface.launch()

 import gradio as gr
 from ultralytics import YOLO
 import cv2
 import fitz  # PyMuPDF
 from PIL import Image
 import spaces
 # Load the trained model
 model_path = 'best.pt'  # Replace with the path to your trained .pt file
 model = YOLO(model_path)
 # Function to perform inference on an image and return bounding boxes for figures and tables
 def infer_image_and_get_boxes(image, confidence_threshold=0.6):
     # Perform inference
+    results = model(image)
     boxes = []
     # Extract results
     # Calculate the scaling factor
     scale_factor = high_dpi / low_dpi
     # Loop through each page
     for page_num in range(len(doc)):
         page = doc.load_page(page_num)
         low_res_pix = page.get_pixmap(dpi=low_dpi)
         low_res_img = Image.frombytes("RGB", [low_res_pix.width, low_res_pix.height], low_res_pix.samples)
         low_res_img = np.array(low_res_img)
         # Get bounding boxes from low DPI image
         boxes = infer_image_and_get_boxes(low_res_img)
+        if boxes:
+            # Load high DPI image for cropping only if boxes are found
+            high_res_pix = page.get_pixmap(dpi=high_dpi)
+            high_res_img = Image.frombytes("RGB", [high_res_pix.width, high_res_pix.height], high_res_pix.samples)
+            high_res_img = np.array(high_res_img)
+            # Crop images at high DPI
+            cropped_imgs = crop_images_from_boxes(high_res_img, boxes, scale_factor)
+            all_cropped_images.extend(cropped_imgs)
     return all_cropped_images
 # Launch the app
 iface.launch()