import gradio as gr from ultralytics import YOLO import cv2 import numpy as np #import spaces # Load the trained model model_path = 'best.pt' # Replace with the path to your trained .pt file model = YOLO(model_path) # Function to perform inference on an image colors = { 0: (255, 0, 0), # Red for category 0 1: (0, 255, 0), # Green for category 1 2: (0, 0, 255), # Blue for category 2 3: (255, 255, 0), # Cyan for category 3 4: (255, 0, 255) # Magenta for category 4 } # Function to perform inference on an image def infer_image(image): # Convert the image from BGR to RGB image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # Perform inference results = model(image_rgb) # Extract results and annotate image for result in results: for box in result.boxes: x1, y1, x2, y2 = box.xyxy[0] cls = int(box.cls[0]) conf = float(box.conf[0]) # Get the color for the current class color = colors.get(cls, (0, 255, 0)) # Default to green if class not found # Draw bounding box cv2.rectangle(image, (int(x1), int(y1)), (int(x2), int(y2)), color, 2) # Draw label label = f'{model.names[cls]} {conf:.2f}' cv2.putText(image, label, (int(x1), int(y1) - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2) return image # Create Gradio interface iface = gr.Interface( fn=infer_image, inputs=gr.Image(type="numpy", label="Upload an Image"), outputs=gr.Image(type="numpy", label="Annotated Image"), title="Fast document layout analysis based on YOLOv8", description="Upload an image to get document layout analysis results." ) # Launch the app iface.launch()