Spaces:

zliang
/

fastpaperlayout

Sleeping

App Files Files Community

zliang commited on May 21, 2024

Commit

ce0af4e

verified ·

1 Parent(s): 9e4f917

Upload app2.py

Browse files

Files changed (1) hide show

app2.py +85 -0

app2.py ADDED Viewed

	@@ -0,0 +1,85 @@

+import gradio as gr
+from ultralytics import YOLO
+import fitz  # PyMuPDF
+from PIL import Image
+import numpy as np
+import cv2
+import io
+# Load the trained YOLOv8 model
+model_path = 'runs\\detect\\train6\\weights\\best.pt'  # Replace with the path to your trained .pt file
+model = YOLO(model_path)
+# Function to extract images from PDF
+def extract_images_from_pdf(pdf_path):
+    doc = fitz.open(pdf_path)
+    images = []
+    for page_num in range(len(doc)):
+        page = doc.load_page(page_num)
+        for img_num, img in enumerate(page.get_images(full=True)):
+            xref = img[0]
+            base_image = doc.extract_image(xref)
+            image_bytes = base_image["image"]
+            image = Image.open(io.BytesIO(image_bytes)).convert("RGB")
+            images.append(image)
+    return images
+# Placeholder function to extract tables (modify as needed)
+def extract_tables_from_pdf(pdf_path):
+    # Dummy implementation; replace with actual table extraction logic
+    return ["Table extraction not implemented"]
+# Function to perform inference on an image
+def infer_image(image):
+    # Convert the image to RGB (if not already in that format)
+    image_rgb = np.array(image.convert('RGB'))
+    # Perform inference
+    results = model(image_rgb)
+    # Annotate image
+    annotated_image = np.array(image_rgb)
+    for result in results:
+        for box in result.boxes:
+            x1, y1, x2, y2 = box.xyxy[0]
+            cls = int(box.cls[0])
+            conf = float(box.conf[0])
+            # Draw bounding box
+            cv2.rectangle(annotated_image, (int(x1), int(y1)), (int(x2), int(y2)), (0, 255, 0), 2)
+            # Draw label
+            label = f'{model.names[cls]} {conf:.2f}'
+            cv2.putText(annotated_image, label, (int(x1), int(y1) - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
+    return annotated_image
+# Gradio function to process PDF and return images and tables
+def process_pdf(pdf):
+    # Extract images and tables from PDF
+    images = extract_images_from_pdf(pdf.name)
+    tables = extract_tables_from_pdf(pdf.name)
+    # Perform inference on extracted images
+    annotated_images = [infer_image(img) for img in images]
+    # Convert annotated images back to Image format for Gradio
+    annotated_images_pil = [Image.fromarray(img) for img in annotated_images]
+    # Return annotated images and tables
+    return annotated_images_pil, tables
+# Create Gradio interface
+iface = gr.Interface(
+    fn=process_pdf,
+    inputs=gr.File( label="Upload a PDF"),
+    outputs=[
+        gr.Gallery(label="Annotated Images"),
+        gr.Textbox(label="Extracted Tables")
+    ],
+    title="PDF Image and Table Extraction with YOLOv8",
+    description="Upload a PDF to extract and annotate images and tables using YOLOv8."
+)
+# Launch the app
+iface.launch()