--- tags: - image-classification license: mit datasets: - imagenet-1k - imagenet-22k --- See: [https://huggingface.co/timm/eva02_tiny_patch14_336.mim_in22k_ft_in1k](https://huggingface.co/timm/eva02_tiny_patch14_336.mim_in22k_ft_in1k) ```python from urllib.request import urlopen import einops import numpy as np import onnxruntime as ort from PIL import Image def softmax(x): y = np.exp(x - np.max(x)) return y / y.sum(axis=0) IMG_URL = 'https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/beignets-task-guide.png' IN1K_CLASSES_URL = 'https://storage.googleapis.com/bit_models/ilsvrc2012_wordnet_lemmas.txt' session = ort.InferenceSession('eva02_tiny_patch14_336.mim_in22k_ft_in1k.ort') # session = ort.InferenceSession('eva02_tiny_patch14_336.mim_in22k_ft_in1k.onnx') labels = urlopen(IN1K_CLASSES_URL).read().decode().splitlines() img = np.array( Image.open(urlopen(IMG_URL)) .resize(session._sess.inputs_meta[0].shape[2:]) ) # e.g. in1k norm stats mean = .485, .456, .406 sd = .229, .224, .225 img = (img / 255. - mean) / sd # to clearly illustrate format ort expects img = einops.rearrange(img, 'h w c -> 1 c h w').astype(np.float32) out = session.run(None, {session.get_inputs()[0].name: img}) out = softmax(out[0][0]) topk = np.argsort(out)[::-1][:5] for i in topk: print(f'{out[i]:.2f}', labels[i]) ```