zliang commited on
Commit
f662962
β€’
1 Parent(s): ce0af4e

Rename app2.py to app.py

Browse files
Files changed (1) hide show
  1. app2.py β†’ app.py +85 -85
app2.py β†’ app.py RENAMED
@@ -1,85 +1,85 @@
1
- import gradio as gr
2
- from ultralytics import YOLO
3
- import fitz # PyMuPDF
4
- from PIL import Image
5
- import numpy as np
6
- import cv2
7
- import io
8
-
9
- # Load the trained YOLOv8 model
10
- model_path = 'runs\\detect\\train6\\weights\\best.pt' # Replace with the path to your trained .pt file
11
- model = YOLO(model_path)
12
-
13
- # Function to extract images from PDF
14
- def extract_images_from_pdf(pdf_path):
15
- doc = fitz.open(pdf_path)
16
- images = []
17
-
18
- for page_num in range(len(doc)):
19
- page = doc.load_page(page_num)
20
- for img_num, img in enumerate(page.get_images(full=True)):
21
- xref = img[0]
22
- base_image = doc.extract_image(xref)
23
- image_bytes = base_image["image"]
24
- image = Image.open(io.BytesIO(image_bytes)).convert("RGB")
25
- images.append(image)
26
- return images
27
-
28
- # Placeholder function to extract tables (modify as needed)
29
- def extract_tables_from_pdf(pdf_path):
30
- # Dummy implementation; replace with actual table extraction logic
31
- return ["Table extraction not implemented"]
32
-
33
- # Function to perform inference on an image
34
- def infer_image(image):
35
- # Convert the image to RGB (if not already in that format)
36
- image_rgb = np.array(image.convert('RGB'))
37
-
38
- # Perform inference
39
- results = model(image_rgb)
40
-
41
- # Annotate image
42
- annotated_image = np.array(image_rgb)
43
- for result in results:
44
- for box in result.boxes:
45
- x1, y1, x2, y2 = box.xyxy[0]
46
- cls = int(box.cls[0])
47
- conf = float(box.conf[0])
48
-
49
- # Draw bounding box
50
- cv2.rectangle(annotated_image, (int(x1), int(y1)), (int(x2), int(y2)), (0, 255, 0), 2)
51
- # Draw label
52
- label = f'{model.names[cls]} {conf:.2f}'
53
- cv2.putText(annotated_image, label, (int(x1), int(y1) - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
54
-
55
- return annotated_image
56
-
57
- # Gradio function to process PDF and return images and tables
58
- def process_pdf(pdf):
59
- # Extract images and tables from PDF
60
- images = extract_images_from_pdf(pdf.name)
61
- tables = extract_tables_from_pdf(pdf.name)
62
-
63
- # Perform inference on extracted images
64
- annotated_images = [infer_image(img) for img in images]
65
-
66
- # Convert annotated images back to Image format for Gradio
67
- annotated_images_pil = [Image.fromarray(img) for img in annotated_images]
68
-
69
- # Return annotated images and tables
70
- return annotated_images_pil, tables
71
-
72
- # Create Gradio interface
73
- iface = gr.Interface(
74
- fn=process_pdf,
75
- inputs=gr.File( label="Upload a PDF"),
76
- outputs=[
77
- gr.Gallery(label="Annotated Images"),
78
- gr.Textbox(label="Extracted Tables")
79
- ],
80
- title="PDF Image and Table Extraction with YOLOv8",
81
- description="Upload a PDF to extract and annotate images and tables using YOLOv8."
82
- )
83
-
84
- # Launch the app
85
- iface.launch()
 
1
+ import gradio as gr
2
+ from ultralytics import YOLO
3
+ import fitz # PyMuPDF
4
+ from PIL import Image
5
+ import numpy as np
6
+ import cv2
7
+ import io
8
+
9
+ # Load the trained YOLOv8 model
10
+ model_path = 'best.pt' # Replace with the path to your trained .pt file
11
+ model = YOLO(model_path)
12
+
13
+ # Function to extract images from PDF
14
+ def extract_images_from_pdf(pdf_path):
15
+ doc = fitz.open(pdf_path)
16
+ images = []
17
+
18
+ for page_num in range(len(doc)):
19
+ page = doc.load_page(page_num)
20
+ for img_num, img in enumerate(page.get_images(full=True)):
21
+ xref = img[0]
22
+ base_image = doc.extract_image(xref)
23
+ image_bytes = base_image["image"]
24
+ image = Image.open(io.BytesIO(image_bytes)).convert("RGB")
25
+ images.append(image)
26
+ return images
27
+
28
+ # Placeholder function to extract tables (modify as needed)
29
+ def extract_tables_from_pdf(pdf_path):
30
+ # Dummy implementation; replace with actual table extraction logic
31
+ return ["Table extraction not implemented"]
32
+
33
+ # Function to perform inference on an image
34
+ def infer_image(image):
35
+ # Convert the image to RGB (if not already in that format)
36
+ image_rgb = np.array(image.convert('RGB'))
37
+
38
+ # Perform inference
39
+ results = model(image_rgb)
40
+
41
+ # Annotate image
42
+ annotated_image = np.array(image_rgb)
43
+ for result in results:
44
+ for box in result.boxes:
45
+ x1, y1, x2, y2 = box.xyxy[0]
46
+ cls = int(box.cls[0])
47
+ conf = float(box.conf[0])
48
+
49
+ # Draw bounding box
50
+ cv2.rectangle(annotated_image, (int(x1), int(y1)), (int(x2), int(y2)), (0, 255, 0), 2)
51
+ # Draw label
52
+ label = f'{model.names[cls]} {conf:.2f}'
53
+ cv2.putText(annotated_image, label, (int(x1), int(y1) - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
54
+
55
+ return annotated_image
56
+
57
+ # Gradio function to process PDF and return images and tables
58
+ def process_pdf(pdf):
59
+ # Extract images and tables from PDF
60
+ images = extract_images_from_pdf(pdf.name)
61
+ tables = extract_tables_from_pdf(pdf.name)
62
+
63
+ # Perform inference on extracted images
64
+ annotated_images = [infer_image(img) for img in images]
65
+
66
+ # Convert annotated images back to Image format for Gradio
67
+ annotated_images_pil = [Image.fromarray(img) for img in annotated_images]
68
+
69
+ # Return annotated images and tables
70
+ return annotated_images_pil, tables
71
+
72
+ # Create Gradio interface
73
+ iface = gr.Interface(
74
+ fn=process_pdf,
75
+ inputs=gr.File( label="Upload a PDF"),
76
+ outputs=[
77
+ gr.Gallery(label="Annotated Images"),
78
+ gr.Textbox(label="Extracted Tables")
79
+ ],
80
+ title="PDF Image and Table Extraction with YOLOv8",
81
+ description="Upload a PDF to extract and annotate images and tables using YOLOv8."
82
+ )
83
+
84
+ # Launch the app
85
+ iface.launch()