ketanmore
/

ArabicDoc-layout-Detection

Model card Files Files and versions Community

ketanmore commited on Oct 30, 2024

Commit

dd31650

•

1 Parent(s): 2720487

Delete surya_yolo_pipeline.py

Browse files

Files changed (1) hide show

surya_yolo_pipeline.py +0 -169

surya_yolo_pipeline.py DELETED Viewed

@@ -1,169 +0,0 @@
-import cv2
-import supervision as sv # pip install supervision
-from ultralytics import YOLO
-import numpy as np
-import matplotlib.pyplot as plt
-yolo_model = YOLO('yolov10x_best.pt')
-from surya.model.detection.segformer import load_processor , load_model
-import torch
-import os
-from surya.model.detection.segformer import load_processor , load_model
-import torch
-import os
-# os.environ['HF_HOME'] = '/share/data/drive_3/ketan/orc/HF_Cache'
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-model = load_model("vikp/surya_layout2").to(device)
-from PIL import Image
-from surya.input.processing import prepare_image_detection
-def predicted_mask_function(image_path) :
-    img = Image.open(image_path)
-    img = [prepare_image_detection(img=img, processor=load_processor())]
-    img = torch.stack(img, dim=0).to(model.dtype).to(model.device)
-    logits  = model(img).logits
-    predicted_mask = torch.argmax(logits[0], dim=0).cpu().numpy()
-    return predicted_mask
-def predict_boxes_labels(image_path):
-    results = yolo_model(source=image_path, conf=0.2, iou=0.8)[0]
-    detections = sv.Detections.from_ultralytics(results)
-    labels = detections.data["class_name"].tolist()
-    bboxes = detections.xyxy.tolist()
-    return bboxes,labels
-def resize_segment(mask, class_id, target_size, method=cv2.INTER_AREA):
-    # Create a binary mask for the current class
-    class_mask = np.where(mask == class_id, 1, 0).astype(np.uint8)
-    # Resize the class mask to the target size
-    resized_class_mask = cv2.resize(class_mask, (target_size[1], target_size[0]), interpolation=method)
-    return resized_class_mask
-def resize_and_combine_classes(mask, target_size, method=cv2.INTER_AREA):
-    unique_classes = np.unique(mask)
-    # Initialize a zero-filled mask for the combined result with the correct target size
-    resized_masks = np.zeros((target_size[0], target_size[1]), dtype=np.uint8)
-    # Process each class found in the mask
-    for class_id in unique_classes:
-        resized_class_mask = resize_segment(mask, class_id, target_size, method)
-        # Assign the class ID to the resized output mask where the resized class mask is 1
-        resized_masks[resized_class_mask == 1] = class_id
-    return resized_masks
-class_labels = {
-    0: 'Blank',
-    1: 'Caption',
-    2: 'Footnote',
-    3: 'Formula',
-    4: 'List-item',
-    5: 'Page-footer',
-    6: 'Page-header',
-    7: 'Picture',
-    8: 'Section-header',
-    9: 'Table',
-    10: 'Text',
-    11: 'Title'
-}
-colors = plt.cm.get_cmap('tab20', len(class_labels))
-def colormap_to_rgb(cmap, index):
-    color = cmap(index)[:3]  # Extract RGB, ignore alpha
-    return tuple(int(c * 255) for c in color)
-def mask_to_bboxes(colored_mask, class_labels):
-    bboxes = []
-    # Loop through each class in the class_labels
-    for label, class_name in class_labels.items():
-        # Get the RGB color for the current label
-        color = colormap_to_rgb(colors, label)
-        # Create a binary mask for the current label by checking where the colored mask matches the class color
-        class_mask = np.all(colored_mask == color, axis=-1).astype(np.uint8)
-        # Find contours of the class region in the binary mask
-        contours, _ = cv2.findContours(class_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
-        # Loop through all contours and extract bounding boxes
-        for contour in contours:
-            # Get the bounding box for the contour (in xywh format)
-            x, y, w, h = cv2.boundingRect(contour)
-            # Convert to xyxy format: (xmin, ymin, xmax, ymax)
-            xmin, ymin, xmax, ymax = x, y, x + w, y + h
-            # Append the bounding box with the corresponding class label
-            bboxes.append((xmin, ymin, xmax, ymax))
-            # bboxes.append((xmin, ymin, xmax, ymax, class_name))
-    return bboxes
-import matplotlib.pyplot as plt
-# from matplotlib import colors
-def suryolo(image_path) :
-    image = Image.open(image_path)
-    L, W = image.size
-    predicted_mask = predicted_mask_function(image_path)
-    colored_mask = np.zeros((W, L, 3), dtype=np.uint8)  # 3 channels for RGB
-    label_name_to_int = {v: k for k, v in class_labels.items()}
-    colors = plt.cm.get_cmap('tab20', len(class_labels))
-    bboxes,labels = predict_boxes_labels(image_path)
-    for box, label in zip(bboxes, labels):  # Assuming labels list corresponds to bboxes
-        xmin, ymin, xmax, ymax = box
-        xmin, ymin, xmax, ymax = int(xmin), int(ymin), int(xmax), int(ymax)
-        # Resize predicted mask to match the image dimensions (W = width, L = height)
-        predicted_mask = resize_and_combine_classes(predicted_mask, (W, L))
-        # Extract the mask region within the bounding box
-        mask_region = predicted_mask[ymin:ymax, xmin:xmax]
-        # Get the corresponding integer index for the label
-        label_index = label_name_to_int[label]
-        # Get the corresponding color for the label using the colormap
-        color = colormap_to_rgb(colors, label_index)
-        # Apply the color to the regions where mask_region > 0.5
-        colored_mask[ymin:ymax, xmin:xmax][mask_region > 0.5] = color
-    blank_color = colormap_to_rgb(colors, 0)
-    colored_mask[(colored_mask == 0).all(axis=-1)] = blank_color
-    return mask_to_bboxes(colored_mask,class_labels)