Spaces:
Running
Running
File size: 6,582 Bytes
aea73e2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 |
# --------------------------------------------------------
# UniRepLKNet
# https://github.com/AILab-CVC/UniRepLKNet
# Licensed under The Apache 2.0 License [see LICENSE for details]
# --------------------------------------------------------
import json
from mmcv.runner import OPTIMIZER_BUILDERS, DefaultOptimizerConstructor
from mmcv.runner import get_dist_info
from mmdet.utils import get_root_logger
def get_layer_id(var_name, max_layer_id,):
"""Get the layer id to set the different learning rates in ``layer_wise``
decay_type.
Args:
var_name (str): The key of the model.
max_layer_id (int): Maximum layer id.
Returns:
int: The id number corresponding to different learning rate in
``LearningRateDecayOptimizerConstructor``.
"""
if var_name in ('backbone.cls_token', 'backbone.mask_token',
'backbone.pos_embed'):
return 0
elif var_name.startswith('backbone.downsample_layers'):
stage_id = int(var_name.split('.')[2])
if stage_id == 0:
layer_id = 0
elif stage_id == 1:
layer_id = 2
elif stage_id == 2:
layer_id = 3
elif stage_id == 3:
layer_id = max_layer_id
return layer_id
elif var_name.startswith('backbone.stages'):
stage_id = int(var_name.split('.')[2])
block_id = int(var_name.split('.')[3])
if stage_id == 0:
layer_id = 1
elif stage_id == 1:
layer_id = 2
elif stage_id == 2:
layer_id = 3 + block_id // 3
elif stage_id == 3:
layer_id = max_layer_id
return layer_id
else:
return max_layer_id + 1
def get_stage_id(var_name, max_stage_id):
"""Get the stage id to set the different learning rates in ``stage_wise``
decay_type.
Args:
var_name (str): The key of the model.
max_stage_id (int): Maximum stage id.
Returns:
int: The id number corresponding to different learning rate in
``LearningRateDecayOptimizerConstructor``.
"""
if var_name in ('backbone.cls_token', 'backbone.mask_token',
'backbone.pos_embed'):
return 0
elif var_name.startswith('backbone.downsample_layers'):
return 0
elif var_name.startswith('backbone.stages'):
stage_id = int(var_name.split('.')[2])
return stage_id + 1
else:
return max_stage_id - 1
@OPTIMIZER_BUILDERS.register_module()
class UniRepLKNetLearningRateDecayOptimizerConstructor(DefaultOptimizerConstructor):
# Different learning rates are set for different layers of backbone.
# The design is inspired by and adapted from ConvNeXt.
def add_params(self, params, module, **kwargs):
"""Add all parameters of module to the params list.
The parameters of the given module will be added to the list of param
groups, with specific rules defined by paramwise_cfg.
Args:
params (list[dict]): A list of param groups, it will be modified
in place.
module (nn.Module): The module to be added.
"""
logger = get_root_logger()
parameter_groups = {}
logger.info(f'self.paramwise_cfg is {self.paramwise_cfg}')
num_layers = self.paramwise_cfg.get('num_layers') + 2
decay_rate = self.paramwise_cfg.get('decay_rate')
decay_type = self.paramwise_cfg.get('decay_type', 'layer_wise')
dw_scale = self.paramwise_cfg.get('dw_scale', 1)
logger.info('Build UniRepLKNetLearningRateDecayOptimizerConstructor '
f'{decay_type} {decay_rate} - {num_layers}')
weight_decay = self.base_wd
for name, param in module.named_parameters():
if not param.requires_grad:
continue # frozen weights
if len(param.shape) == 1 or name.endswith('.bias') or name in (
'pos_embed', 'cls_token'):
group_name = 'no_decay'
this_weight_decay = 0.
else:
group_name = 'decay'
this_weight_decay = weight_decay
if 'layer_wise' in decay_type:
layer_id = get_layer_id(name, self.paramwise_cfg.get('num_layers'))
logger.info(f'set param {name} as id {layer_id}')
elif decay_type == 'stage_wise':
layer_id = get_stage_id(name, num_layers)
logger.info(f'set param {name} as id {layer_id}')
if dw_scale == 1 or 'dwconv' not in name:
group_name = f'layer_{layer_id}_{group_name}'
if group_name not in parameter_groups:
scale = decay_rate ** (num_layers - layer_id - 1)
parameter_groups[group_name] = {
'weight_decay': this_weight_decay,
'params': [],
'param_names': [],
'lr_scale': scale,
'group_name': group_name,
'lr': scale * self.base_lr,
}
parameter_groups[group_name]['params'].append(param)
parameter_groups[group_name]['param_names'].append(name)
else:
group_name = f'layer_{layer_id}_{group_name}_dwconv'
if group_name not in parameter_groups:
scale = decay_rate ** (num_layers - layer_id - 1) * dw_scale
parameter_groups[group_name] = {
'weight_decay': this_weight_decay,
'params': [],
'param_names': [],
'lr_scale': scale,
'group_name': group_name,
'lr': scale * self.base_lr,
}
parameter_groups[group_name]['params'].append(param)
parameter_groups[group_name]['param_names'].append(name)
rank, _ = get_dist_info()
if rank == 0:
to_display = {}
for key in parameter_groups:
to_display[key] = {
'param_names': parameter_groups[key]['param_names'],
'lr_scale': parameter_groups[key]['lr_scale'],
'lr': parameter_groups[key]['lr'],
'weight_decay': parameter_groups[key]['weight_decay'],
}
logger.info(f'Param groups = {json.dumps(to_display, indent=2)}')
params.extend(parameter_groups.values())
|