Spaces:

zliang
/

fastpaperlayout

Sleeping

App Files Files Community

zliang commited on May 23

Commit

c65777e

•

1 Parent(s): ec2e6e8

Update app.py

Browse files

Files changed (1) hide show

app.py +14 -24

app.py CHANGED Viewed

@@ -1,13 +1,13 @@
 import gradio as gr
 from ultralytics import YOLO
 import cv2
 import numpy as np
 import fitz  # PyMuPDF
 from PIL import Image
-import spaces
 # Load the trained model
-model_path = 'best.pt'  # Replace with the path to your trained .pt file
 model = YOLO(model_path)
 # Define the class indices for figures and tables
@@ -16,28 +16,20 @@ table_class_index = 4   # class index for tables
 # Function to perform inference on an image and return bounding boxes for figures and tables
 def infer_image_and_get_boxes(image, confidence_threshold=0.6):
-    # Perform inference
     results = model(image)
-    boxes = []
-    # Extract results
-    for result in results:
-        for box in result.boxes:
-            cls = int(box.cls[0])
-            confidence = box.conf[0]
-            if (cls == figure_class_index or cls == table_class_index) and confidence > confidence_threshold:
-                x1, y1, x2, y2 = map(int, box.xyxy[0])
-                boxes.append((x1, y1, x2, y2))
     return boxes
 # Function to crop images from the boxes
 def crop_images_from_boxes(image, boxes, scale_factor):
-    cropped_images = []
-    for box in boxes:
-        x1, y1, x2, y2 = [int(coord * scale_factor) for coord in box]
-        cropped_image = image[y1:y2, x1:x2]
-        cropped_images.append(cropped_image)
     return cropped_images
 @spaces.GPU
@@ -49,7 +41,7 @@ def process_pdf(pdf_file):
     # Set the DPI for inference and high resolution for cropping
     low_dpi = 50
     high_dpi = 300
     # Calculate the scaling factor
     scale_factor = high_dpi / low_dpi
@@ -59,8 +51,7 @@ def process_pdf(pdf_file):
         # Perform inference at low DPI
         low_res_pix = page.get_pixmap(dpi=low_dpi)
-        low_res_img = Image.frombytes("RGB", [low_res_pix.width, low_res_pix.height], low_res_pix.samples)
-        low_res_img = np.array(low_res_img)
         # Get bounding boxes from low DPI image
         boxes = infer_image_and_get_boxes(low_res_img)
@@ -68,8 +59,7 @@ def process_pdf(pdf_file):
         if boxes:
             # Load high DPI image for cropping only if boxes are found
             high_res_pix = page.get_pixmap(dpi=high_dpi)
-            high_res_img = Image.frombytes("RGB", [high_res_pix.width, high_res_pix.height], high_res_pix.samples)
-            high_res_img = np.array(high_res_img)
             # Crop images at high DPI
             cropped_imgs = crop_images_from_boxes(high_res_img, boxes, scale_factor)

 import gradio as gr
 from ultralytics import YOLO
 import cv2
+import spaces
 import numpy as np
 import fitz  # PyMuPDF
 from PIL import Image
 # Load the trained model
+model_path = 'runs/detect/train7/weights/best.pt'  # Replace with the path to your trained .pt file
 model = YOLO(model_path)
 # Define the class indices for figures and tables
 # Function to perform inference on an image and return bounding boxes for figures and tables
 def infer_image_and_get_boxes(image, confidence_threshold=0.6):
     results = model(image)
+    boxes = [
+        (int(box.xyxy[0][0]), int(box.xyxy[0][1]), int(box.xyxy[0][2]), int(box.xyxy[0][3]))
+        for result in results for box in result.boxes
+        if int(box.cls[0]) in {figure_class_index, table_class_index} and box.conf[0] > confidence_threshold
+    ]
     return boxes
 # Function to crop images from the boxes
 def crop_images_from_boxes(image, boxes, scale_factor):
+    cropped_images = [
+        image[int(y1 * scale_factor):int(y2 * scale_factor), int(x1 * scale_factor):int(x2 * scale_factor)]
+        for (x1, y1, x2, y2) in boxes
+    ]
     return cropped_images
 @spaces.GPU
     # Set the DPI for inference and high resolution for cropping
     low_dpi = 50
     high_dpi = 300
     # Calculate the scaling factor
     scale_factor = high_dpi / low_dpi
         # Perform inference at low DPI
         low_res_pix = page.get_pixmap(dpi=low_dpi)
+        low_res_img = np.frombuffer(low_res_pix.samples, dtype=np.uint8).reshape(low_res_pix.height, low_res_pix.width, 3)
         # Get bounding boxes from low DPI image
         boxes = infer_image_and_get_boxes(low_res_img)
         if boxes:
             # Load high DPI image for cropping only if boxes are found
             high_res_pix = page.get_pixmap(dpi=high_dpi)
+            high_res_img = np.frombuffer(high_res_pix.samples, dtype=np.uint8).reshape(high_res_pix.height, high_res_pix.width, 3)
             # Crop images at high DPI
             cropped_imgs = crop_images_from_boxes(high_res_img, boxes, scale_factor)