Spaces:
Runtime error
Runtime error
""" | |
Using as reference: | |
- https://huggingface.co/nvidia/segformer-b0-finetuned-ade-512-512 | |
- https://huggingface.co/spaces/chansung/segformer-tf-transformers/blob/main/app.py | |
- https://huggingface.co/facebook/detr-resnet-50-panoptic | |
""" | |
from transformers import DetrFeatureExtractor, DetrForSegmentation | |
from PIL import Image | |
import gradio as gr | |
import numpy as np | |
import torch | |
import torchvision | |
# Returns a list with a color per ADE class (150 classes) | |
# from https://huggingface.co/spaces/chansung/segformer-tf-transformers/blob/main/app.py | |
def ade_palette(): | |
"""ADE20K palette that maps each class to RGB values.""" | |
return [ | |
[120, 120, 120], | |
[180, 120, 120], | |
[6, 230, 230], | |
[80, 50, 50], | |
[4, 200, 3], | |
[120, 120, 80], | |
[140, 140, 140], | |
[204, 5, 255], | |
[230, 230, 230], | |
[4, 250, 7], | |
[224, 5, 255], | |
[235, 255, 7], | |
[150, 5, 61], | |
[120, 120, 70], | |
[8, 255, 51], | |
[255, 6, 82], | |
[143, 255, 140], | |
[204, 255, 4], | |
[255, 51, 7], | |
[204, 70, 3], | |
[0, 102, 200], | |
[61, 230, 250], | |
[255, 6, 51], | |
[11, 102, 255], | |
[255, 7, 71], | |
[255, 9, 224], | |
[9, 7, 230], | |
[220, 220, 220], | |
[255, 9, 92], | |
[112, 9, 255], | |
[8, 255, 214], | |
[7, 255, 224], | |
[255, 184, 6], | |
[10, 255, 71], | |
[255, 41, 10], | |
[7, 255, 255], | |
[224, 255, 8], | |
[102, 8, 255], | |
[255, 61, 6], | |
[255, 194, 7], | |
[255, 122, 8], | |
[0, 255, 20], | |
[255, 8, 41], | |
[255, 5, 153], | |
[6, 51, 255], | |
[235, 12, 255], | |
[160, 150, 20], | |
[0, 163, 255], | |
[140, 140, 140], | |
[250, 10, 15], | |
[20, 255, 0], | |
[31, 255, 0], | |
[255, 31, 0], | |
[255, 224, 0], | |
[153, 255, 0], | |
[0, 0, 255], | |
[255, 71, 0], | |
[0, 235, 255], | |
[0, 173, 255], | |
[31, 0, 255], | |
[11, 200, 200], | |
[255, 82, 0], | |
[0, 255, 245], | |
[0, 61, 255], | |
[0, 255, 112], | |
[0, 255, 133], | |
[255, 0, 0], | |
[255, 163, 0], | |
[255, 102, 0], | |
[194, 255, 0], | |
[0, 143, 255], | |
[51, 255, 0], | |
[0, 82, 255], | |
[0, 255, 41], | |
[0, 255, 173], | |
[10, 0, 255], | |
[173, 255, 0], | |
[0, 255, 153], | |
[255, 92, 0], | |
[255, 0, 255], | |
[255, 0, 245], | |
[255, 0, 102], | |
[255, 173, 0], | |
[255, 0, 20], | |
[255, 184, 184], | |
[0, 31, 255], | |
[0, 255, 61], | |
[0, 71, 255], | |
[255, 0, 204], | |
[0, 255, 194], | |
[0, 255, 82], | |
[0, 10, 255], | |
[0, 112, 255], | |
[51, 0, 255], | |
[0, 194, 255], | |
[0, 122, 255], | |
[0, 255, 163], | |
[255, 153, 0], | |
[0, 255, 10], | |
[255, 112, 0], | |
[143, 255, 0], | |
[82, 0, 255], | |
[163, 255, 0], | |
[255, 235, 0], | |
[8, 184, 170], | |
[133, 0, 255], | |
[0, 255, 92], | |
[184, 0, 255], | |
[255, 0, 31], | |
[0, 184, 255], | |
[0, 214, 255], | |
[255, 0, 112], | |
[92, 255, 0], | |
[0, 224, 255], | |
[112, 224, 255], | |
[70, 184, 160], | |
[163, 0, 255], | |
[153, 0, 255], | |
[71, 255, 0], | |
[255, 0, 163], | |
[255, 204, 0], | |
[255, 0, 143], | |
[0, 255, 235], | |
[133, 255, 0], | |
[255, 0, 235], | |
[245, 0, 255], | |
[255, 0, 122], | |
[255, 245, 0], | |
[10, 190, 212], | |
[214, 255, 0], | |
[0, 204, 255], | |
[20, 0, 255], | |
[255, 255, 0], | |
[0, 153, 255], | |
[0, 41, 255], | |
[0, 255, 204], | |
[41, 0, 255], | |
[41, 255, 0], | |
[173, 0, 255], | |
[0, 245, 255], | |
[71, 0, 255], | |
[122, 0, 255], | |
[0, 255, 184], | |
[0, 92, 255], | |
[184, 255, 0], | |
[0, 133, 255], | |
[255, 214, 0], | |
[25, 194, 194], | |
[102, 255, 0], | |
[92, 0, 255], | |
] | |
feature_extractor = DetrFeatureExtractor.from_pretrained('facebook/detr-resnet-50-panoptic') | |
model = DetrForSegmentation.from_pretrained('facebook/detr-resnet-50-panoptic') | |
# gradio components | |
input = gr.inputs.Image() | |
output = gr.outputs.Image() | |
def predict_animal_mask(im): | |
image = Image.fromarray(im) # im: numpy array 3d: 480, 640, 3: to PIL Image | |
image = image.resize((200,200)) # PIL image # could I upsample output instead? better? | |
inputs = feature_extractor(images=image, return_tensors="pt") #pt=Pytorch, tf=TensorFlow | |
outputs = model(**inputs) | |
logits = outputs.logits # torch.Size([1, 100, 251]) | |
bboxes = outputs.pred_boxes | |
masks = outputs.pred_masks # torch.Size([1, 100, 200, 200]) | |
# postprocess the image | |
label_per_pixel = torch.argmax(masks.squeeze(),dim=0).detach().numpy() | |
color_mask = np.zeros(image.size+(3,)) | |
for lbl, color in enumerate(ade_palette()): | |
color_mask[label_per_pixel==lbl,:] = color | |
# Show image + mask | |
pred_img = np.array(image.convert('RGB'))*0.5 + color_mask*0.5 | |
pred_img = pred_img.astype(np.uint8) | |
return pred_img | |
#################################################### | |
# Create user interface and launch | |
gr.Interface(predict_animal_mask, | |
inputs = input, | |
outputs = output, | |
title = 'Animals* segmentation in images', | |
description = "An animal* segmentation image webapp using DETR (End-to-End Object Detection) model with ResNet-50 backbone").launch() | |
#################################### | |
# url = "http://images.cocodataset.org/val2017/000000039769.jpg" | |
# image = Image.open(requests.get(url, stream=True).raw) | |
# inputs = feature_extractor(images=image, return_tensors="pt") | |
# outputs = model(**inputs) | |
# logits = outputs.logits # shape (batch_size, num_labels, height/4, width/4) | |