Spaces:
Sleeping
Sleeping
from transformers import ViTMAEForPreTraining, ViTImageProcessor | |
import numpy as np | |
import torch | |
import gradio as gr | |
image_processor = ViTImageProcessor.from_pretrained('andrewbo29/vit-mae-base-formula1') | |
model = ViTMAEForPreTraining.from_pretrained('andrewbo29/vit-mae-base-formula1') | |
imagenet_mean = np.array(image_processor.image_mean) | |
imagenet_std = np.array(image_processor.image_std) | |
def prep_image(image): | |
return torch.clip((image * imagenet_std + imagenet_mean) * 255, 0, 255).int().cpu().numpy() | |
def reconstruct(image): | |
pixel_values = image_processor.preprocess(image, return_tensors='pt').pixel_values | |
outputs = model(pixel_values) | |
y = model.unpatchify(outputs.logits) | |
y = torch.einsum('nchw->nhwc', y).detach().cpu() | |
# visualize the mask | |
mask = outputs.mask.detach() | |
mask = mask.unsqueeze(-1).repeat(1, 1, model.config.patch_size ** 2 * 3) # (N, H*W, p*p*3) | |
mask = model.unpatchify(mask) # 1 is removing, 0 is keeping | |
mask = torch.einsum('nchw->nhwc', mask).detach().cpu() | |
x = torch.einsum('nchw->nhwc', pixel_values).detach().cpu() | |
# masked image | |
im_masked = x * (1 - mask) | |
# MAE reconstruction pasted with visible patches | |
im_paste = x * (1 - mask) + y * mask | |
out_orig = prep_image(x[0]) | |
out_masked = prep_image(im_masked[0]) | |
out_rec = prep_image(y[0]) | |
out_rec_vis = prep_image(im_paste[0]) | |
return [(out_orig, 'original'), | |
(out_masked, 'masked'), | |
(out_rec, 'reconstruction'), | |
(out_rec_vis, 'reconstruction + visible')] | |
with gr.Blocks() as demo: | |
with gr.Column(variant='panel'): | |
with gr.Column(): | |
img = gr.Image( | |
container=False, | |
type='pil' | |
) | |
btn = gr.Button( | |
'Apply F1 MAE', | |
scale=0 | |
) | |
gallery = gr.Gallery( | |
columns=4, | |
rows=1, | |
height='300px', | |
object_fit='none' | |
) | |
btn.click(reconstruct, img, gallery) | |
if __name__ == "__main__": | |
demo.launch() | |