Spaces:

zliang
/

fastpaperlayout

Sleeping

App Files Files Community

zliang commited on May 21

Commit

f662962

•

1 Parent(s): ce0af4e

Rename app2.py to app.py

Browse files

Files changed (1) hide show

app2.py → app.py +85 -85

app2.py → app.py RENAMED Viewed

@@ -1,85 +1,85 @@
-import gradio as gr
-from ultralytics import YOLO
-import fitz  # PyMuPDF
-from PIL import Image
-import numpy as np
-import cv2
-import io
-# Load the trained YOLOv8 model
-model_path = 'runs\\detect\\train6\\weights\\best.pt'  # Replace with the path to your trained .pt file
-model = YOLO(model_path)
-# Function to extract images from PDF
-def extract_images_from_pdf(pdf_path):
-    doc = fitz.open(pdf_path)
-    images = []
-    for page_num in range(len(doc)):
-        page = doc.load_page(page_num)
-        for img_num, img in enumerate(page.get_images(full=True)):
-            xref = img[0]
-            base_image = doc.extract_image(xref)
-            image_bytes = base_image["image"]
-            image = Image.open(io.BytesIO(image_bytes)).convert("RGB")
-            images.append(image)
-    return images
-# Placeholder function to extract tables (modify as needed)
-def extract_tables_from_pdf(pdf_path):
-    # Dummy implementation; replace with actual table extraction logic
-    return ["Table extraction not implemented"]
-# Function to perform inference on an image
-def infer_image(image):
-    # Convert the image to RGB (if not already in that format)
-    image_rgb = np.array(image.convert('RGB'))
-    # Perform inference
-    results = model(image_rgb)
-    # Annotate image
-    annotated_image = np.array(image_rgb)
-    for result in results:
-        for box in result.boxes:
-            x1, y1, x2, y2 = box.xyxy[0]
-            cls = int(box.cls[0])
-            conf = float(box.conf[0])
-            # Draw bounding box
-            cv2.rectangle(annotated_image, (int(x1), int(y1)), (int(x2), int(y2)), (0, 255, 0), 2)
-            # Draw label
-            label = f'{model.names[cls]} {conf:.2f}'
-            cv2.putText(annotated_image, label, (int(x1), int(y1) - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
-    return annotated_image
-# Gradio function to process PDF and return images and tables
-def process_pdf(pdf):
-    # Extract images and tables from PDF
-    images = extract_images_from_pdf(pdf.name)
-    tables = extract_tables_from_pdf(pdf.name)
-    # Perform inference on extracted images
-    annotated_images = [infer_image(img) for img in images]
-    # Convert annotated images back to Image format for Gradio
-    annotated_images_pil = [Image.fromarray(img) for img in annotated_images]
-    # Return annotated images and tables
-    return annotated_images_pil, tables
-# Create Gradio interface
-iface = gr.Interface(
-    fn=process_pdf,
-    inputs=gr.File( label="Upload a PDF"),
-    outputs=[
-        gr.Gallery(label="Annotated Images"),
-        gr.Textbox(label="Extracted Tables")
-    ],
-    title="PDF Image and Table Extraction with YOLOv8",
-    description="Upload a PDF to extract and annotate images and tables using YOLOv8."
-)
-# Launch the app
-iface.launch()

+import gradio as gr
+from ultralytics import YOLO
+import fitz  # PyMuPDF
+from PIL import Image
+import numpy as np
+import cv2
+import io
+# Load the trained YOLOv8 model
+model_path = 'best.pt'  # Replace with the path to your trained .pt file
+model = YOLO(model_path)
+# Function to extract images from PDF
+def extract_images_from_pdf(pdf_path):
+    doc = fitz.open(pdf_path)
+    images = []
+    for page_num in range(len(doc)):
+        page = doc.load_page(page_num)
+        for img_num, img in enumerate(page.get_images(full=True)):
+            xref = img[0]
+            base_image = doc.extract_image(xref)
+            image_bytes = base_image["image"]
+            image = Image.open(io.BytesIO(image_bytes)).convert("RGB")
+            images.append(image)
+    return images
+# Placeholder function to extract tables (modify as needed)
+def extract_tables_from_pdf(pdf_path):
+    # Dummy implementation; replace with actual table extraction logic
+    return ["Table extraction not implemented"]
+# Function to perform inference on an image
+def infer_image(image):
+    # Convert the image to RGB (if not already in that format)
+    image_rgb = np.array(image.convert('RGB'))
+    # Perform inference
+    results = model(image_rgb)
+    # Annotate image
+    annotated_image = np.array(image_rgb)
+    for result in results:
+        for box in result.boxes:
+            x1, y1, x2, y2 = box.xyxy[0]
+            cls = int(box.cls[0])
+            conf = float(box.conf[0])
+            # Draw bounding box
+            cv2.rectangle(annotated_image, (int(x1), int(y1)), (int(x2), int(y2)), (0, 255, 0), 2)
+            # Draw label
+            label = f'{model.names[cls]} {conf:.2f}'
+            cv2.putText(annotated_image, label, (int(x1), int(y1) - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
+    return annotated_image
+# Gradio function to process PDF and return images and tables
+def process_pdf(pdf):
+    # Extract images and tables from PDF
+    images = extract_images_from_pdf(pdf.name)
+    tables = extract_tables_from_pdf(pdf.name)
+    # Perform inference on extracted images
+    annotated_images = [infer_image(img) for img in images]
+    # Convert annotated images back to Image format for Gradio
+    annotated_images_pil = [Image.fromarray(img) for img in annotated_images]
+    # Return annotated images and tables
+    return annotated_images_pil, tables
+# Create Gradio interface
+iface = gr.Interface(
+    fn=process_pdf,
+    inputs=gr.File( label="Upload a PDF"),
+    outputs=[
+        gr.Gallery(label="Annotated Images"),
+        gr.Textbox(label="Extracted Tables")
+    ],
+    title="PDF Image and Table Extraction with YOLOv8",
+    description="Upload a PDF to extract and annotate images and tables using YOLOv8."
+)
+# Launch the app
+iface.launch()