File size: 2,650 Bytes
265ae36
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
MODEL:
  WEIGHTS: ''
compute_precision:
  grad_scaler: true
  teacher:
    backbone:
      sharding_strategy: SHARD_GRAD_OP
      mixed_precision:
        param_dtype: fp16
        reduce_dtype: fp16
        buffer_dtype: fp32
    dino_head:
      sharding_strategy: SHARD_GRAD_OP
      mixed_precision:
        param_dtype: fp16
        reduce_dtype: fp16
        buffer_dtype: fp32
    ibot_head:
      sharding_strategy: SHARD_GRAD_OP
      mixed_precision:
        param_dtype: fp16
        reduce_dtype: fp16
        buffer_dtype: fp32
  student:
    backbone:
      sharding_strategy: SHARD_GRAD_OP
      mixed_precision:
        param_dtype: fp16
        reduce_dtype: fp16
        buffer_dtype: fp32
    dino_head:
      sharding_strategy: SHARD_GRAD_OP
      mixed_precision:
        param_dtype: fp16
        reduce_dtype: fp32
        buffer_dtype: fp32
    ibot_head:
      sharding_strategy: SHARD_GRAD_OP
      mixed_precision:
        param_dtype: fp16
        reduce_dtype: fp32
        buffer_dtype: fp32
dino:
  loss_weight: 1.0
  head_n_prototypes: 65536
  head_bottleneck_dim: 256
  head_nlayers: 3
  head_hidden_dim: 2048
  koleo_loss_weight: 0.1
ibot:
  loss_weight: 1.0
  mask_sample_probability: 0.5
  mask_ratio_min_max:
  - 0.1
  - 0.5
  separate_head: false
  head_n_prototypes: 65536
  head_bottleneck_dim: 256
  head_nlayers: 3
  head_hidden_dim: 2048
train:
  batch_size_per_gpu: 64
  dataset_path: ImageNet:split=TRAIN
  output_dir: .
  saveckp_freq: 20
  seed: 0
  num_workers: 10
  OFFICIAL_EPOCH_LENGTH: 1250
  cache_dataset: true
  centering: "centering" # or "sinkhorn_knopp"
student:
  arch: vit_large
  patch_size: 16
  drop_path_rate: 0.3
  layerscale: 1.0e-05
  drop_path_uniform: true
  pretrained_weights: ''
  ffn_layer: "mlp"
  block_chunks: 0
  qkv_bias: true
  proj_bias: true
  ffn_bias: true
  num_register_tokens: 0
  interpolate_antialias: false
  interpolate_offset: 0.1
teacher:
  momentum_teacher: 0.992
  final_momentum_teacher: 1
  warmup_teacher_temp: 0.04
  teacher_temp: 0.07
  warmup_teacher_temp_epochs: 30
optim:
  epochs: 100
  weight_decay: 0.04
  weight_decay_end: 0.4
  base_lr: 0.004  # learning rate for a batch size of 1024
  lr: 0.  # will be set after applying scaling rule
  warmup_epochs: 10
  min_lr: 1.0e-06
  clip_grad: 3.0
  freeze_last_layer_epochs: 1
  scaling_rule: sqrt_wrt_1024
  patch_embed_lr_mult: 0.2
  layerwise_decay: 0.9
  adamw_beta1: 0.9
  adamw_beta2: 0.999
crops:
  global_crops_scale:
  - 0.32
  - 1.0
  local_crops_number: 8
  local_crops_scale:
  - 0.05
  - 0.32
  global_crops_size: 224
  local_crops_size: 96
evaluation:
  eval_period_iterations: 12500