Spaces:

CHSTR
/

DocExplore_DEMO

Sleeping

App Files Files Community

DocExplore_DEMO / src /dinov2 /configs /ssl_default_config.yaml

CHSTR

Upload src

265ae36 verified about 1 month ago

raw

history blame

2.65 kB

	MODEL:
	WEIGHTS: ''
	compute_precision:
	grad_scaler: true
	teacher:
	backbone:
	sharding_strategy: SHARD_GRAD_OP
	mixed_precision:
	param_dtype: fp16
	reduce_dtype: fp16
	buffer_dtype: fp32
	dino_head:
	sharding_strategy: SHARD_GRAD_OP
	mixed_precision:
	param_dtype: fp16
	reduce_dtype: fp16
	buffer_dtype: fp32
	ibot_head:
	sharding_strategy: SHARD_GRAD_OP
	mixed_precision:
	param_dtype: fp16
	reduce_dtype: fp16
	buffer_dtype: fp32
	student:
	backbone:
	sharding_strategy: SHARD_GRAD_OP
	mixed_precision:
	param_dtype: fp16
	reduce_dtype: fp16
	buffer_dtype: fp32
	dino_head:
	sharding_strategy: SHARD_GRAD_OP
	mixed_precision:
	param_dtype: fp16
	reduce_dtype: fp32
	buffer_dtype: fp32
	ibot_head:
	sharding_strategy: SHARD_GRAD_OP
	mixed_precision:
	param_dtype: fp16
	reduce_dtype: fp32
	buffer_dtype: fp32
	dino:
	loss_weight: 1.0
	head_n_prototypes: 65536
	head_bottleneck_dim: 256
	head_nlayers: 3
	head_hidden_dim: 2048
	koleo_loss_weight: 0.1
	ibot:
	loss_weight: 1.0
	mask_sample_probability: 0.5
	mask_ratio_min_max:
	- 0.1
	- 0.5
	separate_head: false
	head_n_prototypes: 65536
	head_bottleneck_dim: 256
	head_nlayers: 3
	head_hidden_dim: 2048
	train:
	batch_size_per_gpu: 64
	dataset_path: ImageNet:split=TRAIN
	output_dir: .
	saveckp_freq: 20
	seed: 0
	num_workers: 10
	OFFICIAL_EPOCH_LENGTH: 1250
	cache_dataset: true
	centering: "centering" # or "sinkhorn_knopp"
	student:
	arch: vit_large
	patch_size: 16
	drop_path_rate: 0.3
	layerscale: 1.0e-05
	drop_path_uniform: true
	pretrained_weights: ''
	ffn_layer: "mlp"
	block_chunks: 0
	qkv_bias: true
	proj_bias: true
	ffn_bias: true
	num_register_tokens: 0
	interpolate_antialias: false
	interpolate_offset: 0.1
	teacher:
	momentum_teacher: 0.992
	final_momentum_teacher: 1
	warmup_teacher_temp: 0.04
	teacher_temp: 0.07
	warmup_teacher_temp_epochs: 30
	optim:
	epochs: 100
	weight_decay: 0.04
	weight_decay_end: 0.4
	base_lr: 0.004 # learning rate for a batch size of 1024
	lr: 0. # will be set after applying scaling rule
	warmup_epochs: 10
	min_lr: 1.0e-06
	clip_grad: 3.0
	freeze_last_layer_epochs: 1
	scaling_rule: sqrt_wrt_1024
	patch_embed_lr_mult: 0.2
	layerwise_decay: 0.9
	adamw_beta1: 0.9
	adamw_beta2: 0.999
	crops:
	global_crops_scale:
	- 0.32
	- 1.0
	local_crops_number: 8
	local_crops_scale:
	- 0.05
	- 0.32
	global_crops_size: 224
	local_crops_size: 96
	evaluation:
	eval_period_iterations: 12500