zliang commited on
Commit
eb98323
1 Parent(s): cbe1985

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -16
app.py CHANGED
@@ -1,43 +1,43 @@
1
  import gradio as gr
2
  import numpy as np
3
  import fitz # PyMuPDF
4
- import spaces
5
  from ultralytics import YOLOv10
6
 
7
  # Load the trained model
8
-
9
  model = YOLOv10("best.pt")
10
 
11
-
12
  # Define the class indices for figures and tables
13
  figure_class_index = 3 # class index for figures
14
  table_class_index = 4 # class index for tables
15
 
16
  # Function to perform inference on an image and return bounding boxes for figures and tables
17
-
18
  def infer_image_and_get_boxes(image, confidence_threshold=0.6):
19
  results = model.predict(image)
20
  boxes = [
21
- (int(box.xyxy[0][0]), int(box.xyxy[0][1]), int(box.xyxy[0][2]), int(box.xyxy[0][3]))
22
  for result in results for box in result.boxes
23
  if int(box.cls[0]) in {figure_class_index, table_class_index} and box.conf[0] > confidence_threshold
24
  ]
25
  return boxes
26
 
27
  # Function to crop images from the boxes
28
-
29
  def crop_images_from_boxes(image, boxes, scale_factor):
30
- cropped_images = [
31
- image[int(y1 * scale_factor):int(y2 * scale_factor), int(x1 * scale_factor):int(x2 * scale_factor)]
32
- for (x1, y1, x2, y2) in boxes
33
- ]
34
- return cropped_images
 
 
 
 
35
 
36
  @spaces.GPU
37
  def process_pdf(pdf_file):
38
  # Open the PDF file
39
  doc = fitz.open(pdf_file)
40
- all_cropped_images = []
 
41
 
42
  # Set the DPI for inference and high resolution for cropping
43
  low_dpi = 50
@@ -62,16 +62,20 @@ def process_pdf(pdf_file):
62
  high_res_img = np.frombuffer(high_res_pix.samples, dtype=np.uint8).reshape(high_res_pix.height, high_res_pix.width, 3)
63
 
64
  # Crop images at high DPI
65
- cropped_imgs = crop_images_from_boxes(high_res_img, boxes, scale_factor)
66
- all_cropped_images.extend(cropped_imgs)
 
67
 
68
- return all_cropped_images
69
 
70
  # Create Gradio interface
71
  iface = gr.Interface(
72
  fn=process_pdf,
73
  inputs=gr.File(label="Upload a PDF"),
74
- outputs=gr.Gallery(label="Cropped Figures and Tables from PDF Pages"),
 
 
 
75
  title="Fast document layout analysis based on YOLOv10",
76
  description="Upload a PDF file to get cropped figures and tables from each page."
77
  )
 
1
  import gradio as gr
2
  import numpy as np
3
  import fitz # PyMuPDF
 
4
  from ultralytics import YOLOv10
5
 
6
  # Load the trained model
 
7
  model = YOLOv10("best.pt")
8
 
 
9
  # Define the class indices for figures and tables
10
  figure_class_index = 3 # class index for figures
11
  table_class_index = 4 # class index for tables
12
 
13
  # Function to perform inference on an image and return bounding boxes for figures and tables
 
14
  def infer_image_and_get_boxes(image, confidence_threshold=0.6):
15
  results = model.predict(image)
16
  boxes = [
17
+ (int(box.xyxy[0][0]), int(box.xyxy[0][1]), int(box.xyxy[0][2]), int(box.xyxy[0][3]), int(box.cls[0]))
18
  for result in results for box in result.boxes
19
  if int(box.cls[0]) in {figure_class_index, table_class_index} and box.conf[0] > confidence_threshold
20
  ]
21
  return boxes
22
 
23
  # Function to crop images from the boxes
 
24
  def crop_images_from_boxes(image, boxes, scale_factor):
25
+ figures = []
26
+ tables = []
27
+ for (x1, y1, x2, y2, cls) in boxes:
28
+ cropped_img = image[int(y1 * scale_factor):int(y2 * scale_factor), int(x1 * scale_factor):int(x2 * scale_factor)]
29
+ if cls == figure_class_index:
30
+ figures.append(cropped_img)
31
+ elif cls == table_class_index:
32
+ tables.append(cropped_img)
33
+ return figures, tables
34
 
35
  @spaces.GPU
36
  def process_pdf(pdf_file):
37
  # Open the PDF file
38
  doc = fitz.open(pdf_file)
39
+ all_figures = []
40
+ all_tables = []
41
 
42
  # Set the DPI for inference and high resolution for cropping
43
  low_dpi = 50
 
62
  high_res_img = np.frombuffer(high_res_pix.samples, dtype=np.uint8).reshape(high_res_pix.height, high_res_pix.width, 3)
63
 
64
  # Crop images at high DPI
65
+ figures, tables = crop_images_from_boxes(high_res_img, boxes, scale_factor)
66
+ all_figures.extend(figures)
67
+ all_tables.extend(tables)
68
 
69
+ return all_figures, all_tables
70
 
71
  # Create Gradio interface
72
  iface = gr.Interface(
73
  fn=process_pdf,
74
  inputs=gr.File(label="Upload a PDF"),
75
+ outputs=[
76
+ gr.Gallery(label="Cropped Figures from PDF Pages"),
77
+ gr.Gallery(label="Cropped Tables from PDF Pages")
78
+ ],
79
  title="Fast document layout analysis based on YOLOv10",
80
  description="Upload a PDF file to get cropped figures and tables from each page."
81
  )