Spaces:

zliang
/

fastpaperlayout

Sleeping

App Files Files Community

zliang commited on Jun 1

Commit

eb98323

•

1 Parent(s): cbe1985

Update app.py

Browse files

Files changed (1) hide show

app.py +20 -16

app.py CHANGED Viewed

@@ -1,43 +1,43 @@
 import gradio as gr
 import numpy as np
 import fitz  # PyMuPDF
-import spaces
 from ultralytics import YOLOv10
 # Load the trained model
 model = YOLOv10("best.pt")
 # Define the class indices for figures and tables
 figure_class_index = 3  # class index for figures
 table_class_index = 4   # class index for tables
 # Function to perform inference on an image and return bounding boxes for figures and tables
 def infer_image_and_get_boxes(image, confidence_threshold=0.6):
     results = model.predict(image)
     boxes = [
-        (int(box.xyxy[0][0]), int(box.xyxy[0][1]), int(box.xyxy[0][2]), int(box.xyxy[0][3]))
         for result in results for box in result.boxes
         if int(box.cls[0]) in {figure_class_index, table_class_index} and box.conf[0] > confidence_threshold
     ]
     return boxes
 # Function to crop images from the boxes
 def crop_images_from_boxes(image, boxes, scale_factor):
-    cropped_images = [
-        image[int(y1 * scale_factor):int(y2 * scale_factor), int(x1 * scale_factor):int(x2 * scale_factor)]
-        for (x1, y1, x2, y2) in boxes
-    ]
-    return cropped_images
 @spaces.GPU
 def process_pdf(pdf_file):
     # Open the PDF file
     doc = fitz.open(pdf_file)
-    all_cropped_images = []
     # Set the DPI for inference and high resolution for cropping
     low_dpi = 50
@@ -62,16 +62,20 @@ def process_pdf(pdf_file):
             high_res_img = np.frombuffer(high_res_pix.samples, dtype=np.uint8).reshape(high_res_pix.height, high_res_pix.width, 3)
             # Crop images at high DPI
-            cropped_imgs = crop_images_from_boxes(high_res_img, boxes, scale_factor)
-            all_cropped_images.extend(cropped_imgs)
-    return all_cropped_images
 # Create Gradio interface
 iface = gr.Interface(
     fn=process_pdf,
     inputs=gr.File(label="Upload a PDF"),
-    outputs=gr.Gallery(label="Cropped Figures and Tables from PDF Pages"),
     title="Fast document layout analysis based on YOLOv10",
     description="Upload a PDF file to get cropped figures and tables from each page."
 )

 import gradio as gr
 import numpy as np
 import fitz  # PyMuPDF
 from ultralytics import YOLOv10
 # Load the trained model
 model = YOLOv10("best.pt")
 # Define the class indices for figures and tables
 figure_class_index = 3  # class index for figures
 table_class_index = 4   # class index for tables
 # Function to perform inference on an image and return bounding boxes for figures and tables
 def infer_image_and_get_boxes(image, confidence_threshold=0.6):
     results = model.predict(image)
     boxes = [
+        (int(box.xyxy[0][0]), int(box.xyxy[0][1]), int(box.xyxy[0][2]), int(box.xyxy[0][3]), int(box.cls[0]))
         for result in results for box in result.boxes
         if int(box.cls[0]) in {figure_class_index, table_class_index} and box.conf[0] > confidence_threshold
     ]
     return boxes
 # Function to crop images from the boxes
 def crop_images_from_boxes(image, boxes, scale_factor):
+    figures = []
+    tables = []
+    for (x1, y1, x2, y2, cls) in boxes:
+        cropped_img = image[int(y1 * scale_factor):int(y2 * scale_factor), int(x1 * scale_factor):int(x2 * scale_factor)]
+        if cls == figure_class_index:
+            figures.append(cropped_img)
+        elif cls == table_class_index:
+            tables.append(cropped_img)
+    return figures, tables
 @spaces.GPU
 def process_pdf(pdf_file):
     # Open the PDF file
     doc = fitz.open(pdf_file)
+    all_figures = []
+    all_tables = []
     # Set the DPI for inference and high resolution for cropping
     low_dpi = 50
             high_res_img = np.frombuffer(high_res_pix.samples, dtype=np.uint8).reshape(high_res_pix.height, high_res_pix.width, 3)
             # Crop images at high DPI
+            figures, tables = crop_images_from_boxes(high_res_img, boxes, scale_factor)
+            all_figures.extend(figures)
+            all_tables.extend(tables)
+    return all_figures, all_tables
 # Create Gradio interface
 iface = gr.Interface(
     fn=process_pdf,
     inputs=gr.File(label="Upload a PDF"),
+    outputs=[
+        gr.Gallery(label="Cropped Figures from PDF Pages"),
+        gr.Gallery(label="Cropped Tables from PDF Pages")
+    ],
     title="Fast document layout analysis based on YOLOv10",
     description="Upload a PDF file to get cropped figures and tables from each page."
 )