Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,43 +1,43 @@
|
|
1 |
import gradio as gr
|
2 |
import numpy as np
|
3 |
import fitz # PyMuPDF
|
4 |
-
import spaces
|
5 |
from ultralytics import YOLOv10
|
6 |
|
7 |
# Load the trained model
|
8 |
-
|
9 |
model = YOLOv10("best.pt")
|
10 |
|
11 |
-
|
12 |
# Define the class indices for figures and tables
|
13 |
figure_class_index = 3 # class index for figures
|
14 |
table_class_index = 4 # class index for tables
|
15 |
|
16 |
# Function to perform inference on an image and return bounding boxes for figures and tables
|
17 |
-
|
18 |
def infer_image_and_get_boxes(image, confidence_threshold=0.6):
|
19 |
results = model.predict(image)
|
20 |
boxes = [
|
21 |
-
(int(box.xyxy[0][0]), int(box.xyxy[0][1]), int(box.xyxy[0][2]), int(box.xyxy[0][3]))
|
22 |
for result in results for box in result.boxes
|
23 |
if int(box.cls[0]) in {figure_class_index, table_class_index} and box.conf[0] > confidence_threshold
|
24 |
]
|
25 |
return boxes
|
26 |
|
27 |
# Function to crop images from the boxes
|
28 |
-
|
29 |
def crop_images_from_boxes(image, boxes, scale_factor):
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
|
|
|
|
|
|
|
|
35 |
|
36 |
@spaces.GPU
|
37 |
def process_pdf(pdf_file):
|
38 |
# Open the PDF file
|
39 |
doc = fitz.open(pdf_file)
|
40 |
-
|
|
|
41 |
|
42 |
# Set the DPI for inference and high resolution for cropping
|
43 |
low_dpi = 50
|
@@ -62,16 +62,20 @@ def process_pdf(pdf_file):
|
|
62 |
high_res_img = np.frombuffer(high_res_pix.samples, dtype=np.uint8).reshape(high_res_pix.height, high_res_pix.width, 3)
|
63 |
|
64 |
# Crop images at high DPI
|
65 |
-
|
66 |
-
|
|
|
67 |
|
68 |
-
return
|
69 |
|
70 |
# Create Gradio interface
|
71 |
iface = gr.Interface(
|
72 |
fn=process_pdf,
|
73 |
inputs=gr.File(label="Upload a PDF"),
|
74 |
-
outputs=
|
|
|
|
|
|
|
75 |
title="Fast document layout analysis based on YOLOv10",
|
76 |
description="Upload a PDF file to get cropped figures and tables from each page."
|
77 |
)
|
|
|
1 |
import gradio as gr
|
2 |
import numpy as np
|
3 |
import fitz # PyMuPDF
|
|
|
4 |
from ultralytics import YOLOv10
|
5 |
|
6 |
# Load the trained model
|
|
|
7 |
model = YOLOv10("best.pt")
|
8 |
|
|
|
9 |
# Define the class indices for figures and tables
|
10 |
figure_class_index = 3 # class index for figures
|
11 |
table_class_index = 4 # class index for tables
|
12 |
|
13 |
# Function to perform inference on an image and return bounding boxes for figures and tables
|
|
|
14 |
def infer_image_and_get_boxes(image, confidence_threshold=0.6):
|
15 |
results = model.predict(image)
|
16 |
boxes = [
|
17 |
+
(int(box.xyxy[0][0]), int(box.xyxy[0][1]), int(box.xyxy[0][2]), int(box.xyxy[0][3]), int(box.cls[0]))
|
18 |
for result in results for box in result.boxes
|
19 |
if int(box.cls[0]) in {figure_class_index, table_class_index} and box.conf[0] > confidence_threshold
|
20 |
]
|
21 |
return boxes
|
22 |
|
23 |
# Function to crop images from the boxes
|
|
|
24 |
def crop_images_from_boxes(image, boxes, scale_factor):
|
25 |
+
figures = []
|
26 |
+
tables = []
|
27 |
+
for (x1, y1, x2, y2, cls) in boxes:
|
28 |
+
cropped_img = image[int(y1 * scale_factor):int(y2 * scale_factor), int(x1 * scale_factor):int(x2 * scale_factor)]
|
29 |
+
if cls == figure_class_index:
|
30 |
+
figures.append(cropped_img)
|
31 |
+
elif cls == table_class_index:
|
32 |
+
tables.append(cropped_img)
|
33 |
+
return figures, tables
|
34 |
|
35 |
@spaces.GPU
|
36 |
def process_pdf(pdf_file):
|
37 |
# Open the PDF file
|
38 |
doc = fitz.open(pdf_file)
|
39 |
+
all_figures = []
|
40 |
+
all_tables = []
|
41 |
|
42 |
# Set the DPI for inference and high resolution for cropping
|
43 |
low_dpi = 50
|
|
|
62 |
high_res_img = np.frombuffer(high_res_pix.samples, dtype=np.uint8).reshape(high_res_pix.height, high_res_pix.width, 3)
|
63 |
|
64 |
# Crop images at high DPI
|
65 |
+
figures, tables = crop_images_from_boxes(high_res_img, boxes, scale_factor)
|
66 |
+
all_figures.extend(figures)
|
67 |
+
all_tables.extend(tables)
|
68 |
|
69 |
+
return all_figures, all_tables
|
70 |
|
71 |
# Create Gradio interface
|
72 |
iface = gr.Interface(
|
73 |
fn=process_pdf,
|
74 |
inputs=gr.File(label="Upload a PDF"),
|
75 |
+
outputs=[
|
76 |
+
gr.Gallery(label="Cropped Figures from PDF Pages"),
|
77 |
+
gr.Gallery(label="Cropped Tables from PDF Pages")
|
78 |
+
],
|
79 |
title="Fast document layout analysis based on YOLOv10",
|
80 |
description="Upload a PDF file to get cropped figures and tables from each page."
|
81 |
)
|