same architecture with [timm/vit_large_patch14_dinov2.lvd142m](https://huggingface.co/timm/vit_large_patch14_dinov2.lvd142m) ```shell git clone https://github.com/microsoft/MoGe.git cd MoGe ``` # translate ```python import torch from moge.model import MoGeModel device = torch.device("cuda") model = MoGeModel.from_pretrained("Ruicheng/moge-vitl").to(device) # ------------------- backbone_state_dict = model.backbone.state_dict() filtered_state_dict = {k: v for k, v in backbone_state_dict.items() if 'mask_token' not in k} torch.save(filtered_state_dict, "pytorch_model.bin") # ------------------- ``` # usage ```python from urllib.request import urlopen from PIL import Image import timm img = Image.open(urlopen( 'https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/beignets-task-guide.png' )) model = timm.create_model( 'hf_hub:WeiChow/moge_l_vit', pretrained=True, num_classes=0, # remove classifier nn.Linear ) # for name, param in model.named_parameters(): # print(f"Parameter: {name} - Size: {param.size()} - Total Elements: {param.numel()}") model = model.eval() # get model specific transforms (normalization, resize) data_config = timm.data.resolve_model_data_config(model) transforms = timm.data.create_transform(**data_config, is_training=False) output = model(transforms(img).unsqueeze(0)) # output is (batch_size, num_features) shaped tensor # or equivalently (without needing to set num_classes=0) output = model.forward_features(transforms(img).unsqueeze(0)) # output is unpooled, a (1, 1374, 1024) shaped tensor output = model.forward_head(output, pre_logits=True) print(output) ``` Copyright saved.