WeiChow
/

dptv2_b_vit

Model card Files Files and versions Community

dptv2_b_vit / README.md

WeiChow's picture

Update README.md

b5eb030 verified about 2 months ago

|

2.87 kB

	same architecture with [timm/vit_base_patch14_dinov2.lvd142m](https://huggingface.co/timm/vit_base_patch14_dinov2.lvd142m)

	```shell
	git clone https://github.com/DepthAnything/Depth-Anything-V2
	cd Depth-Anything-V2
	```

	# translate

	```python
	'''
	wget https://huggingface.co/depth-anything/Depth-Anything-V2-Small/resolve/main/depth_anything_v2_vits.pth?download=true
	wget https://huggingface.co/depth-anything/Depth-Anything-V2-Base/resolve/main/depth_anything_v2_vitb.pth?download=true
	wget https://huggingface.co/depth-anything/Depth-Anything-V2-Large/resolve/main/depth_anything_v2_vitl.pth?download=true
	'''
	import torch

	from depth_anything_v2.dpt import DepthAnythingV2

	DEVICE = 'cuda' if torch.cuda.is_available() else 'mps' if torch.backends.mps.is_available() else 'cpu'

	model_configs = {
	'vits': {'encoder': 'vits', 'features': 64, 'out_channels': [48, 96, 192, 384]},
	'vitb': {'encoder': 'vitb', 'features': 128, 'out_channels': [96, 192, 384, 768]},
	'vitl': {'encoder': 'vitl', 'features': 256, 'out_channels': [256, 512, 1024, 1024]},
	}

	encoder = 'vitb' # or 'vits', 'vitb'

	model = DepthAnythingV2(**model_configs[encoder])
	model.load_state_dict(torch.load(f'depth_anything_v2_{encoder}.pth?download=true', map_location='cpu'))
	vit = model.pretrained

	# total_params = 0
	# for name, param in vit.named_parameters():
	# print(f"Parameter: {name} - Size: {param.size()} - Total Elements: {param.numel()}")
	# total_params += param.numel()
	# print(f"Total number of parameters in ViT: {total_params}")

	filtered_state_dict = {k: v for k, v in vit.state_dict().items() if 'mask_token' not in k}
	torch.save(filtered_state_dict, "pytorch_model.bin")
	```

	# usage

	```python
	from urllib.request import urlopen
	from PIL import Image
	import timm

	img = Image.open(urlopen(
	'https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/beignets-task-guide.png'
	))

	model = timm.create_model(
	'vit_base_patch14_dinov2.lvd142m',
	pretrained=True,
	num_classes=0, # remove classifier nn.Linear
	checkpoint_path="pytorch_model.bin"
	)

	# model2.load_state_dict(torch.load("backbone_weights.pth"))

	# for name, param in model.named_parameters():
	# print(f"Parameter: {name} - Size: {param.size()} - Total Elements: {param.numel()}")
	model = model.eval()

	# get model specific transforms (normalization, resize)
	data_config = timm.data.resolve_model_data_config(model)
	transforms = timm.data.create_transform(**data_config, is_training=False)

	output = model(transforms(img).unsqueeze(0)) # output is (batch_size, num_features) shaped tensor

	# or equivalently (without needing to set num_classes=0)

	output = model.forward_features(transforms(img).unsqueeze(0))
	# output is unpooled, a (1, 1374, 1024) shaped tensor

	output = model.forward_head(output, pre_logits=True)
	print(output)
	```


	Copyright saved.