Spaces:
Sleeping
Sleeping
import gradio as gr | |
from ultralytics import YOLO | |
import fitz # PyMuPDF | |
from PIL import Image | |
import numpy as np | |
import cv2 | |
import io | |
# Load the trained YOLOv8 model | |
model_path = 'best.pt' # Replace with the path to your trained .pt file | |
model = YOLO(model_path) | |
# Function to extract images from PDF | |
def extract_images_from_pdf(pdf_path): | |
doc = fitz.open(pdf_path) | |
images = [] | |
for page_num in range(len(doc)): | |
page = doc.load_page(page_num) | |
for img_num, img in enumerate(page.get_images(full=True)): | |
xref = img[0] | |
base_image = doc.extract_image(xref) | |
image_bytes = base_image["image"] | |
image = Image.open(io.BytesIO(image_bytes)).convert("RGB") | |
images.append(image) | |
return images | |
# Placeholder function to extract tables (modify as needed) | |
def extract_tables_from_pdf(pdf_path): | |
# Dummy implementation; replace with actual table extraction logic | |
return ["Table extraction not implemented"] | |
# Function to perform inference on an image | |
def infer_image(image): | |
# Convert the image to RGB (if not already in that format) | |
image_rgb = np.array(image.convert('RGB')) | |
# Perform inference | |
results = model(image_rgb) | |
# Annotate image | |
annotated_image = np.array(image_rgb) | |
for result in results: | |
for box in result.boxes: | |
x1, y1, x2, y2 = box.xyxy[0] | |
cls = int(box.cls[0]) | |
conf = float(box.conf[0]) | |
# Draw bounding box | |
cv2.rectangle(annotated_image, (int(x1), int(y1)), (int(x2), int(y2)), (0, 255, 0), 2) | |
# Draw label | |
label = f'{model.names[cls]} {conf:.2f}' | |
cv2.putText(annotated_image, label, (int(x1), int(y1) - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2) | |
return annotated_image | |
# Gradio function to process PDF and return images and tables | |
def process_pdf(pdf): | |
# Extract images and tables from PDF | |
images = extract_images_from_pdf(pdf.name) | |
tables = extract_tables_from_pdf(pdf.name) | |
# Perform inference on extracted images | |
annotated_images = [infer_image(img) for img in images] | |
# Convert annotated images back to Image format for Gradio | |
annotated_images_pil = [Image.fromarray(img) for img in annotated_images] | |
# Return annotated images and tables | |
return annotated_images_pil, tables | |
# Create Gradio interface | |
iface = gr.Interface( | |
fn=process_pdf, | |
inputs=gr.File( label="Upload a PDF"), | |
outputs=[ | |
gr.Gallery(label="Annotated Images"), | |
gr.Textbox(label="Extracted Tables") | |
], | |
title="PDF Image and Table Extraction with YOLOv8", | |
description="Upload a PDF to extract and annotate images and tables using YOLOv8." | |
) | |
# Launch the app | |
iface.launch() | |