Spaces:

Ramos-Ramos
/

visual-emb-gam-probing

Running

App Files Files Community

visual-emb-gam-probing / app.py

patrickramos

Update app.py

7c68a99 almost 2 years ago

raw

history blame contribute delete

6 kB

	from transformers import AutoFeatureExtractor, AutoModel
	import torch
	from torchvision.transforms.functional import to_pil_image
	from einops import rearrange, reduce
	from skops import hub_utils
	import matplotlib.pyplot as plt
	import seaborn as sns
	import gradio as gr

	import os
	import glob
	import pickle


	setups = ['ResNet-50', 'ViT', 'DINO-ResNet-50', 'DINO-ViT']
	embedder_names = ['microsoft/resnet-50', 'google/vit-base-patch16-224', 'Ramos-Ramos/dino-resnet-50', 'facebook/dino-vitb16']
	gam_names = ['emb-gam-resnet', 'emb-gam-vit', 'emb-gam-dino-resnet', 'emb-gam-dino']

	embedder_to_setup = dict(zip(embedder_names, setups))
	gam_to_setup = dict(zip(gam_names, setups))

	device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

	embedders = {}
	for name in embedder_names:
	embedder = {}
	embedder['feature_extractor'] = AutoFeatureExtractor.from_pretrained(name)
	embedder['model'] = AutoModel.from_pretrained(name).eval().to(device)

	if 'resnet-50' in name:
	embedder['num_patches_side'] = 7
	embedder['embedding_postprocess'] = lambda x: rearrange(x.last_hidden_state, 'b d h w -> b (h w) d')
	else:
	embedder['num_patches_side'] = embedder['model'].config.image_size // embedder['model'].config.patch_size
	embedder['embedding_postprocess'] = lambda x: x.last_hidden_state[:, 1:]
	embedders[embedder_to_setup[name]] = embedder

	gams = {}
	for name in gam_names:
	if not os.path.exists(name):
	os.mkdir(name)
	hub_utils.download(repo_id=f'Ramos-Ramos/{name}', dst=name)

	with open(f'{name}/model.pkl', 'rb') as infile:
	gams[gam_to_setup[name]] = pickle.load(infile)

	labels = [
	'tench',
	'English springer',
	'cassette player',
	'chain saw',
	'church',
	'French horn',
	'garbage truck',
	'gas pump',
	'golf ball',
	'parachute'
	]

	def visualize(input_img, visual_emb_gam_setups, show_scores, show_cbars):
	'''Visualizes the patch contributions to all labels of one or more visual
	Emb-GAMs'''

	if not visual_emb_gam_setups:
	fig = plt.Figure()
	return fig, fig

	patch_contributions = {}

	# get patch contributions per Emb-GAM
	for setup in visual_emb_gam_setups:
	# prepare embedding model
	embedder_setup = embedders[setup]
	feature_extractor = embedder_setup['feature_extractor']
	embedding_postprocess = embedder_setup['embedding_postprocess']
	num_patches_side = embedder_setup['num_patches_side']

	# prepare GAM
	gam = gams[setup]

	# get patch embeddings
	inputs = {
	k: v.to(device)
	for k, v
	in feature_extractor(input_img, return_tensors='pt').items()
	}
	with torch.no_grad():
	patch_embeddings = embedding_postprocess(
	embedder_setup['model'](**inputs)
	).cpu()[0]

	# get patch emebddings
	patch_contributions[setup] = (
	gam.coef_ \
	@ patch_embeddings.T.numpy() \
	+ gam.intercept_.reshape(-1, 1) / (num_patches_side ** 2)
	).reshape(-1, num_patches_side, num_patches_side)

	# plot heatmaps

	multiple_setups = len(visual_emb_gam_setups) > 1

	# set up figure
	fig, axs = plt.subplots(
	len(visual_emb_gam_setups),
	11,
	figsize=(20, round(10/4 * len(visual_emb_gam_setups)))
	)
	gs_ax = axs[0, 0] if multiple_setups else axs[0]
	gs = gs_ax.get_gridspec()
	ax_rm = axs[:, 0] if multiple_setups else [axs[0]]
	for ax in ax_rm:
	ax.remove()
	ax_orig_img = fig.add_subplot(gs[:, 0] if multiple_setups else gs[0])

	# plot original image
	ax_orig_img.imshow(input_img)
	ax_orig_img.axis('off')

	# plot patch contributions
	axs_maps = axs[:, 1:] if multiple_setups else [axs[1:]]
	for i, setup in enumerate(visual_emb_gam_setups):
	vmin = patch_contributions[setup].min()
	vmax = patch_contributions[setup].max()
	for j in range(10):
	ax = axs_maps[i][j]
	sns.heatmap(
	patch_contributions[setup][j],
	ax=ax,
	square=True,
	vmin=vmin,
	vmax=vmax,
	cbar=show_cbars
	)
	if show_scores:
	ax.set_xlabel(f'{patch_contributions[setup][j].sum():.2f}')
	if j == 0:
	ax.set_ylabel(setup)
	if i == 0:
	ax.set_title(labels[j])
	ax.set_xticks([])
	ax.set_yticks([])

	plt.tight_layout()

	return fig

	description = 'Visualize the patch contributions of [visual Emb-GAMs](https://huggingface.co/models?other=visual%20emb-gam) to class labels.'
	article = '''An extension of [Emb-GAMs](https://arxiv.org/abs/2209.11799), visual Emb-GAMs classify images by embedding images, taking intermediate representations correponding to different spatial regions, summing these up, and predicting a class label from the sum using a GAM.

	The use of a sum of embeddings allows us to visualize which regions of an image contributed positive or negatively to each class score.

	No paper yet, but you can refer to these tweets:

	- [Tweet #1](https://twitter.com/patrick_j_ramos/status/1586992857969147904?s=20&t=5-j5gKK0FpZOgzR_9Wdm1g)
	- [Tweet #2](https://twitter.com/patrick_j_ramos/status/1602187142062804992?s=20&t=roTFXfMkHHYVoCuNyN-AUA)

	Also, check out the original [Emb-GAM paper](https://arxiv.org/abs/2209.11799).

	```bibtex
	@article{singh2022emb,
	title={Emb-GAM: an Interpretable and Efficient Predictor using Pre-trained Language Models},
	author={Singh, Chandan and Gao, Jianfeng},
	journal={arXiv preprint arXiv:2209.11799},
	year={2022}
	}
	```
	'''

	demo = gr.Interface(
	fn=visualize,
	inputs=[
	gr.Image(shape=(224, 224), type='pil', label='Input image'),
	gr.CheckboxGroup(setups, value=setups, label='Visual Emb-GAM'),
	gr.Checkbox(label='Show scores'),
	gr.Checkbox(label='Show color bars')
	],
	outputs=[
	gr.Plot(label='Patch contributions'),
	],
	examples=[[path,setups,False,False] for path in glob.glob('examples/*')],
	title='Visual Emb-GAM Probing',
	description=description,
	article=article,
	examples_per_page=20
	)
	demo.launch(debug=True)