import cv2 import torch import numpy as np from torch import nn from transformers import AutoImageProcessor, Swinv2ForImageClassification, SegformerForSemanticSegmentation from lib.cam import ClassActivationMap from lib.utils import add_mask, simple_vcdr class GlaucomaModel(object): def __init__(self, cls_model_path="pamixsun/swinv2_tiny_for_glaucoma_classification", seg_model_path='pamixsun/segformer_for_optic_disc_cup_segmentation', device=torch.device('cpu')): # where to load the model, gpu or cpu ? self.device = device # classification model for glaucoma self.cls_extractor = AutoImageProcessor.from_pretrained(cls_model_path) self.cls_model = Swinv2ForImageClassification.from_pretrained(cls_model_path).to(device).eval() # segmentation model for optic disc and cup self.seg_extractor = AutoImageProcessor.from_pretrained(seg_model_path) self.seg_model = SegformerForSemanticSegmentation.from_pretrained(seg_model_path).to(device).eval() # class activation map self.cam = ClassActivationMap(self.cls_model, self.cls_extractor) # classification id to label self.cls_id2label = self.cls_model.config.id2label # segmentation id to label self.seg_id2label = self.seg_model.config.id2label # number of classes for classification self.num_diseases = len(self.cls_id2label) # number of classes for segmentation self.seg_classes = len(self.seg_id2label) def glaucoma_pred(self, image): """ Args: image: image array in RGB order. """ inputs = self.cls_extractor(images=image.copy(), return_tensors="pt") with torch.no_grad(): inputs.to(self.device) outputs = self.cls_model(**inputs).logits disease_idx = outputs.cpu()[0, :].detach().numpy().argmax() return disease_idx def optic_disc_cup_pred(self, image): """ Args: image: image array in RGB order. """ inputs = self.seg_extractor(images=image.copy(), return_tensors="pt") with torch.no_grad(): inputs.to(self.device) outputs = self.seg_model(**inputs) logits = outputs.logits.cpu() upsampled_logits = nn.functional.interpolate( logits, size=image.shape[:2], mode="bilinear", align_corners=False, ) pred_disc_cup = upsampled_logits.argmax(dim=1)[0] return pred_disc_cup.numpy().astype(np.uint8) def process(self, image): """ Args: image: image array in RGB order. """ image_shape = image.shape[:2] disease_idx = self.glaucoma_pred(image) cam = self.cam.get_cam(image, disease_idx) cam = cv2.resize(cam, image_shape[::-1]) disc_cup = self.optic_disc_cup_pred(image) vcdr = simple_vcdr(disc_cup) _, disc_cup_image = add_mask(image, disc_cup, [0, 1, 2], [[0, 0, 0], [0, 255, 0], [255, 0, 0]], 0.2) return disease_idx, disc_cup_image, cam, vcdr