zliang commited on
Commit
ec2e6e8
1 Parent(s): 3cadd69

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -17
app.py CHANGED
@@ -1,4 +1,3 @@
1
- # Load the trained model
2
  import gradio as gr
3
  from ultralytics import YOLO
4
  import cv2
@@ -6,6 +5,7 @@ import numpy as np
6
  import fitz # PyMuPDF
7
  from PIL import Image
8
  import spaces
 
9
  # Load the trained model
10
  model_path = 'best.pt' # Replace with the path to your trained .pt file
11
  model = YOLO(model_path)
@@ -16,11 +16,8 @@ table_class_index = 4 # class index for tables
16
 
17
  # Function to perform inference on an image and return bounding boxes for figures and tables
18
  def infer_image_and_get_boxes(image, confidence_threshold=0.6):
19
- # Convert the image from BGR to RGB
20
- image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
21
-
22
  # Perform inference
23
- results = model(image_rgb)
24
 
25
  boxes = []
26
  # Extract results
@@ -55,7 +52,7 @@ def process_pdf(pdf_file):
55
 
56
  # Calculate the scaling factor
57
  scale_factor = high_dpi / low_dpi
58
-
59
  # Loop through each page
60
  for page_num in range(len(doc)):
61
  page = doc.load_page(page_num)
@@ -64,18 +61,19 @@ def process_pdf(pdf_file):
64
  low_res_pix = page.get_pixmap(dpi=low_dpi)
65
  low_res_img = Image.frombytes("RGB", [low_res_pix.width, low_res_pix.height], low_res_pix.samples)
66
  low_res_img = np.array(low_res_img)
67
-
68
  # Get bounding boxes from low DPI image
69
  boxes = infer_image_and_get_boxes(low_res_img)
70
-
71
- # Load high DPI image for cropping
72
- high_res_pix = page.get_pixmap(dpi=high_dpi)
73
- high_res_img = Image.frombytes("RGB", [high_res_pix.width, high_res_pix.height], high_res_pix.samples)
74
- high_res_img = np.array(high_res_img)
75
-
76
- # Crop images at high DPI
77
- cropped_imgs = crop_images_from_boxes(high_res_img, boxes, scale_factor)
78
- all_cropped_images.extend(cropped_imgs)
 
79
 
80
  return all_cropped_images
81
 
@@ -90,4 +88,3 @@ iface = gr.Interface(
90
 
91
  # Launch the app
92
  iface.launch()
93
-
 
 
1
  import gradio as gr
2
  from ultralytics import YOLO
3
  import cv2
 
5
  import fitz # PyMuPDF
6
  from PIL import Image
7
  import spaces
8
+
9
  # Load the trained model
10
  model_path = 'best.pt' # Replace with the path to your trained .pt file
11
  model = YOLO(model_path)
 
16
 
17
  # Function to perform inference on an image and return bounding boxes for figures and tables
18
  def infer_image_and_get_boxes(image, confidence_threshold=0.6):
 
 
 
19
  # Perform inference
20
+ results = model(image)
21
 
22
  boxes = []
23
  # Extract results
 
52
 
53
  # Calculate the scaling factor
54
  scale_factor = high_dpi / low_dpi
55
+
56
  # Loop through each page
57
  for page_num in range(len(doc)):
58
  page = doc.load_page(page_num)
 
61
  low_res_pix = page.get_pixmap(dpi=low_dpi)
62
  low_res_img = Image.frombytes("RGB", [low_res_pix.width, low_res_pix.height], low_res_pix.samples)
63
  low_res_img = np.array(low_res_img)
64
+
65
  # Get bounding boxes from low DPI image
66
  boxes = infer_image_and_get_boxes(low_res_img)
67
+
68
+ if boxes:
69
+ # Load high DPI image for cropping only if boxes are found
70
+ high_res_pix = page.get_pixmap(dpi=high_dpi)
71
+ high_res_img = Image.frombytes("RGB", [high_res_pix.width, high_res_pix.height], high_res_pix.samples)
72
+ high_res_img = np.array(high_res_img)
73
+
74
+ # Crop images at high DPI
75
+ cropped_imgs = crop_images_from_boxes(high_res_img, boxes, scale_factor)
76
+ all_cropped_images.extend(cropped_imgs)
77
 
78
  return all_cropped_images
79
 
 
88
 
89
  # Launch the app
90
  iface.launch()