zliang commited on
Commit
1b3f90f
1 Parent(s): 6762203

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -21
app.py CHANGED
@@ -1,22 +1,19 @@
1
  import gradio as gr
2
  import numpy as np
3
- import fitz # PyMuPDF
4
- import spaces
5
  from ultralytics import YOLOv10
6
 
7
  # Load the trained model
8
-
9
  model = YOLOv10("best.pt")
10
 
11
-
12
  # Define the class indices for figures and tables
13
  figure_class_index = 3 # class index for figures
14
  table_class_index = 4 # class index for tables
15
 
16
  # Function to perform inference on an image and return bounding boxes for figures and tables
17
-
18
  def infer_image_and_get_boxes(image, confidence_threshold=0.6):
19
- results = model.predict(image)
20
  boxes = [
21
  (int(box.xyxy[0][0]), int(box.xyxy[0][1]), int(box.xyxy[0][2]), int(box.xyxy[0][3]))
22
  for result in results for box in result.boxes
@@ -25,46 +22,39 @@ def infer_image_and_get_boxes(image, confidence_threshold=0.6):
25
  return boxes
26
 
27
  # Function to crop images from the boxes
28
-
29
  def crop_images_from_boxes(image, boxes, scale_factor):
30
  cropped_images = [
31
- image[int(y1 * scale_factor):int(y2 * scale_factor), int(x1 * scale_factor):int(x2 * scale_factor)]
32
  for (x1, y1, x2, y2) in boxes
33
  ]
34
  return cropped_images
35
 
36
  @spaces.GPU
37
  def process_pdf(pdf_file):
38
- # Open the PDF file
39
- doc = fitz.open(pdf_file)
40
  all_cropped_images = []
41
 
42
  # Set the DPI for inference and high resolution for cropping
43
  low_dpi = 50
44
  high_dpi = 300
45
 
 
 
 
46
  # Calculate the scaling factor
47
  scale_factor = high_dpi / low_dpi
48
 
49
- # Pre-cache all page pixmaps at low DPI
50
- low_res_pixmaps = [page.get_pixmap(dpi=low_dpi) for page in doc]
51
-
52
- # Loop through each page
53
- for page_num, low_res_pix in enumerate(low_res_pixmaps):
54
- low_res_img = np.frombuffer(low_res_pix.samples, dtype=np.uint8).reshape(low_res_pix.height, low_res_pix.width, 3)
55
-
56
  # Get bounding boxes from low DPI image
57
  boxes = infer_image_and_get_boxes(low_res_img)
58
 
59
  if boxes:
60
- # Load high DPI image for cropping only if boxes are found
61
- high_res_pix = doc[page_num].get_pixmap(dpi=high_dpi)
62
- high_res_img = np.frombuffer(high_res_pix.samples, dtype=np.uint8).reshape(high_res_pix.height, high_res_pix.width, 3)
63
 
64
  # Crop images at high DPI
65
  cropped_imgs = crop_images_from_boxes(high_res_img, boxes, scale_factor)
66
  all_cropped_images.extend(cropped_imgs)
67
-
68
  return all_cropped_images
69
 
70
  # Create Gradio interface
@@ -78,3 +68,4 @@ iface = gr.Interface(
78
 
79
  # Launch the app
80
  iface.launch()
 
 
1
  import gradio as gr
2
  import numpy as np
3
+ from pdf2image import convert_from_path
4
+ from PIL import Image
5
  from ultralytics import YOLOv10
6
 
7
  # Load the trained model
 
8
  model = YOLOv10("best.pt")
9
 
 
10
  # Define the class indices for figures and tables
11
  figure_class_index = 3 # class index for figures
12
  table_class_index = 4 # class index for tables
13
 
14
  # Function to perform inference on an image and return bounding boxes for figures and tables
 
15
  def infer_image_and_get_boxes(image, confidence_threshold=0.6):
16
+ results = model.predict(np.array(image))
17
  boxes = [
18
  (int(box.xyxy[0][0]), int(box.xyxy[0][1]), int(box.xyxy[0][2]), int(box.xyxy[0][3]))
19
  for result in results for box in result.boxes
 
22
  return boxes
23
 
24
  # Function to crop images from the boxes
 
25
  def crop_images_from_boxes(image, boxes, scale_factor):
26
  cropped_images = [
27
+ image.crop((int(x1 * scale_factor), int(y1 * scale_factor), int(x2 * scale_factor), int(y2 * scale_factor)))
28
  for (x1, y1, x2, y2) in boxes
29
  ]
30
  return cropped_images
31
 
32
  @spaces.GPU
33
  def process_pdf(pdf_file):
 
 
34
  all_cropped_images = []
35
 
36
  # Set the DPI for inference and high resolution for cropping
37
  low_dpi = 50
38
  high_dpi = 300
39
 
40
+ # Convert PDF pages to images at low DPI
41
+ low_res_images = convert_from_path(pdf_file.name, dpi=low_dpi)
42
+
43
  # Calculate the scaling factor
44
  scale_factor = high_dpi / low_dpi
45
 
46
+ for page_num, low_res_img in enumerate(low_res_images):
 
 
 
 
 
 
47
  # Get bounding boxes from low DPI image
48
  boxes = infer_image_and_get_boxes(low_res_img)
49
 
50
  if boxes:
51
+ # Convert the specific page to high DPI only if boxes are found
52
+ high_res_img = convert_from_path(pdf_file.name, dpi=high_dpi, first_page=page_num+1, last_page=page_num+1)[0]
 
53
 
54
  # Crop images at high DPI
55
  cropped_imgs = crop_images_from_boxes(high_res_img, boxes, scale_factor)
56
  all_cropped_images.extend(cropped_imgs)
57
+
58
  return all_cropped_images
59
 
60
  # Create Gradio interface
 
68
 
69
  # Launch the app
70
  iface.launch()
71
+