zliang commited on
Commit
cbe1985
1 Parent(s): 4da5a4d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -12
app.py CHANGED
@@ -1,19 +1,22 @@
1
  import gradio as gr
2
  import numpy as np
3
- from pdf2image import convert_from_path
4
- from PIL import Image
5
- from ultralytics import YOLOv10
6
  import spaces
 
 
7
  # Load the trained model
 
8
  model = YOLOv10("best.pt")
9
 
 
10
  # Define the class indices for figures and tables
11
  figure_class_index = 3 # class index for figures
12
  table_class_index = 4 # class index for tables
13
 
14
  # Function to perform inference on an image and return bounding boxes for figures and tables
 
15
  def infer_image_and_get_boxes(image, confidence_threshold=0.6):
16
- results = model.predict(np.array(image))
17
  boxes = [
18
  (int(box.xyxy[0][0]), int(box.xyxy[0][1]), int(box.xyxy[0][2]), int(box.xyxy[0][3]))
19
  for result in results for box in result.boxes
@@ -22,39 +25,46 @@ def infer_image_and_get_boxes(image, confidence_threshold=0.6):
22
  return boxes
23
 
24
  # Function to crop images from the boxes
 
25
  def crop_images_from_boxes(image, boxes, scale_factor):
26
  cropped_images = [
27
- image.crop((int(x1 * scale_factor), int(y1 * scale_factor), int(x2 * scale_factor), int(y2 * scale_factor)))
28
  for (x1, y1, x2, y2) in boxes
29
  ]
30
  return cropped_images
31
 
32
  @spaces.GPU
33
  def process_pdf(pdf_file):
 
 
34
  all_cropped_images = []
35
 
36
  # Set the DPI for inference and high resolution for cropping
37
  low_dpi = 50
38
  high_dpi = 300
39
 
40
- # Convert PDF pages to images at low DPI
41
- low_res_images = convert_from_path(pdf_file.name, dpi=low_dpi)
42
-
43
  # Calculate the scaling factor
44
  scale_factor = high_dpi / low_dpi
45
 
46
- for page_num, low_res_img in enumerate(low_res_images):
 
 
 
 
 
 
47
  # Get bounding boxes from low DPI image
48
  boxes = infer_image_and_get_boxes(low_res_img)
49
 
50
  if boxes:
51
- # Convert the specific page to high DPI only if boxes are found
52
- high_res_img = convert_from_path(pdf_file.name, dpi=high_dpi, first_page=page_num+1, last_page=page_num+1)[0]
 
53
 
54
  # Crop images at high DPI
55
  cropped_imgs = crop_images_from_boxes(high_res_img, boxes, scale_factor)
56
  all_cropped_images.extend(cropped_imgs)
57
-
58
  return all_cropped_images
59
 
60
  # Create Gradio interface
@@ -69,3 +79,4 @@ iface = gr.Interface(
69
  # Launch the app
70
  iface.launch()
71
 
 
 
1
  import gradio as gr
2
  import numpy as np
3
+ import fitz # PyMuPDF
 
 
4
  import spaces
5
+ from ultralytics import YOLOv10
6
+
7
  # Load the trained model
8
+
9
  model = YOLOv10("best.pt")
10
 
11
+
12
  # Define the class indices for figures and tables
13
  figure_class_index = 3 # class index for figures
14
  table_class_index = 4 # class index for tables
15
 
16
  # Function to perform inference on an image and return bounding boxes for figures and tables
17
+
18
  def infer_image_and_get_boxes(image, confidence_threshold=0.6):
19
+ results = model.predict(image)
20
  boxes = [
21
  (int(box.xyxy[0][0]), int(box.xyxy[0][1]), int(box.xyxy[0][2]), int(box.xyxy[0][3]))
22
  for result in results for box in result.boxes
 
25
  return boxes
26
 
27
  # Function to crop images from the boxes
28
+
29
  def crop_images_from_boxes(image, boxes, scale_factor):
30
  cropped_images = [
31
+ image[int(y1 * scale_factor):int(y2 * scale_factor), int(x1 * scale_factor):int(x2 * scale_factor)]
32
  for (x1, y1, x2, y2) in boxes
33
  ]
34
  return cropped_images
35
 
36
  @spaces.GPU
37
  def process_pdf(pdf_file):
38
+ # Open the PDF file
39
+ doc = fitz.open(pdf_file)
40
  all_cropped_images = []
41
 
42
  # Set the DPI for inference and high resolution for cropping
43
  low_dpi = 50
44
  high_dpi = 300
45
 
 
 
 
46
  # Calculate the scaling factor
47
  scale_factor = high_dpi / low_dpi
48
 
49
+ # Pre-cache all page pixmaps at low DPI
50
+ low_res_pixmaps = [page.get_pixmap(dpi=low_dpi) for page in doc]
51
+
52
+ # Loop through each page
53
+ for page_num, low_res_pix in enumerate(low_res_pixmaps):
54
+ low_res_img = np.frombuffer(low_res_pix.samples, dtype=np.uint8).reshape(low_res_pix.height, low_res_pix.width, 3)
55
+
56
  # Get bounding boxes from low DPI image
57
  boxes = infer_image_and_get_boxes(low_res_img)
58
 
59
  if boxes:
60
+ # Load high DPI image for cropping only if boxes are found
61
+ high_res_pix = doc[page_num].get_pixmap(dpi=high_dpi)
62
+ high_res_img = np.frombuffer(high_res_pix.samples, dtype=np.uint8).reshape(high_res_pix.height, high_res_pix.width, 3)
63
 
64
  # Crop images at high DPI
65
  cropped_imgs = crop_images_from_boxes(high_res_img, boxes, scale_factor)
66
  all_cropped_images.extend(cropped_imgs)
67
+
68
  return all_cropped_images
69
 
70
  # Create Gradio interface
 
79
  # Launch the app
80
  iface.launch()
81
 
82
+