Upload model (#2)

Browse files

- Upload model (8a44aab200322f75938b0a898aba31e6b29950ae)

Files changed (6) hide show

config.json +1 -1
enable_spectral_reparam.py +227 -0
eradio_model.py +3 -0
hf_model.py +11 -1
model.safetensors +2 -2
radio_model.py +15 -0

config.json CHANGED Viewed

@@ -354,7 +354,7 @@
     432
   ],
   "torch_dtype": "bfloat16",
-  "transformers_version": "4.37.2",
   "version": "radio_v2.1",
   "vitdet_window_size": null
 }

     432
   ],
   "torch_dtype": "bfloat16",
+  "transformers_version": "4.40.1",
   "version": "radio_v2.1",
   "vitdet_window_size": null
 }

enable_spectral_reparam.py ADDED Viewed

	@@ -0,0 +1,227 @@

+from logging import getLogger
+import math
+import os
+from typing import Union, Tuple
+from types import MethodType
+import torch
+from torch import nn
+from torch.nn import functional as F
+from torch.nn.utils import parametrize
+from torch.nn.utils.parametrizations import _SpectralNorm
+from timm.models.vision_transformer import Attention, Mlp
+_EPS = 1e-5
+class _SNReweight(_SpectralNorm):
+    def __init__(self, weight: torch.Tensor, *args, init_norm_to_current: bool = False, alpha: float = 0.05, version: int = 2, **kwargs):
+        super().__init__(weight, *args, **kwargs)
+        self.alpha = alpha
+        self.version = version
+        self.register_buffer('_sn_version', torch.tensor(version))
+        if init_norm_to_current:
+            # This will set the numerator to match the denominator, which should preserve the original values
+            init_scale = self._get_sigma(weight).item()
+        else:
+            init_scale = 1.0
+        if version == 1:
+            init_value = init_scale
+        elif version == 2:
+            t = init_scale - alpha
+            if t < _EPS:
+                getLogger("spectral_reparam").warn(f'The initialized spectral norm {init_scale} is too small to be represented. Setting to {_EPS} instead.')
+                t = _EPS
+            init_value = math.log(math.exp(t) - 1)
+        else:
+            raise ValueError(f'Unsupported version: {version}')
+        # Make 2D so that weight decay gets applied
+        self.scale = nn.Parameter(torch.tensor([[init_value]], dtype=torch.float32, device=weight.device))
+    # Re-implementing this because we need to make division by sigma safe
+    def _get_sigma(self, weight: torch.Tensor) -> torch.Tensor:
+        if weight.ndim == 1:
+            # Faster and more exact path, no need to approximate anything
+            sigma = weight.norm()
+        else:
+            weight_mat = self._reshape_weight_to_matrix(weight)
+            if self.training:
+                self._power_method(weight_mat, self.n_power_iterations)
+            # See above on why we need to clone
+            u = self._u.clone(memory_format=torch.contiguous_format)
+            v = self._v.clone(memory_format=torch.contiguous_format)
+            # The proper way of computing this should be through F.bilinear, but
+            # it seems to have some efficiency issues:
+            # https://github.com/pytorch/pytorch/issues/58093
+            sigma = torch.dot(u, torch.mv(weight_mat, v))
+        return sigma + self.eps
+    def forward(self, weight: torch.Tensor, *args, **kwargs):
+        dtype = weight.dtype
+        sigma = self._get_sigma(weight, *args, **kwargs)
+        if self.version == 1:
+            scale = self.scale
+        elif self.version == 2:
+            scale = F.softplus(self.scale) + self.alpha
+        else:
+            raise ValueError(f'Unsupported version: {self.version}')
+        scale = scale.float() / sigma.float()
+        y = weight * scale
+        if dtype in (torch.float16, torch.bfloat16):
+            y = y.to(dtype)
+        return y
+    def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs):
+        version_key = f'{prefix}_sn_version'
+        if version_key not in state_dict:
+            self.version = 1
+            state_dict[version_key] = torch.tensor(1)
+        return super()._load_from_state_dict(state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs)
+class _AttnSNReweight(nn.Module):
+    def __init__(self, weight: torch.Tensor, *args, init_norm_to_current: bool = False, renorm_values: bool = False, **kwargs):
+        super().__init__()
+        parts = weight.split(weight.shape[0] // 3, dim=0)
+        ct = 2 if not renorm_values else 3
+        self.parts = nn.ModuleList([
+            _SNReweight(p, *args, init_norm_to_current=init_norm_to_current, **kwargs) if i < ct else nn.Identity()
+            for i, p in enumerate(parts)
+        ])
+    def forward(self, weight: torch.Tensor, *args, **kwargs):
+        parts = weight.split(weight.shape[0] // 3, dim=0)
+        parts = [
+            fn(p)
+            for fn, p in zip(self.parts, parts)
+        ]
+        return torch.cat(parts, dim=0)
+def enable_spectral_reparam(model: nn.Module,
+                            n_power_iterations: int = 1,
+                            eps: float = 1e-6,
+                            init_norm_to_current: bool = False,
+                            renorm_values: bool = True,
+                            renorm_mlp: bool = True):
+    # print('Enabling spectral reparametrization')
+    for mod in model.modules():
+        if isinstance(mod, Attention):
+            parametrize.register_parametrization(
+                mod.qkv,
+                'weight',
+                _AttnSNReweight(mod.qkv.weight, n_power_iterations, dim=0, eps=eps, init_norm_to_current=init_norm_to_current, renorm_values=renorm_values),
+            )
+            pass
+        elif isinstance(mod, Mlp) and renorm_mlp:
+            parametrize.register_parametrization(
+                mod.fc1,
+                'weight',
+                _SNReweight(mod.fc1.weight, n_power_iterations, dim=0, eps=eps, init_norm_to_current=init_norm_to_current),
+            )
+            parametrize.register_parametrization(
+                mod.fc2,
+                'weight',
+                _SNReweight(mod.fc2.weight, n_power_iterations, dim=0, eps=eps, init_norm_to_current=init_norm_to_current),
+            )
+            pass
+def configure_spectral_reparam_from_args(model: nn.Module, args):
+    spectral_reparam = getattr(args, 'spectral_reparam', False)
+    if isinstance(spectral_reparam, bool) and spectral_reparam:
+        enable_spectral_reparam(model, init_norm_to_current=args.pretrained)
+    elif isinstance(spectral_reparam, dict):
+        enable_spectral_reparam(
+            model,
+            n_power_iterations=spectral_reparam.get('n_power_iterations', 1),
+            eps=spectral_reparam.get('eps', 1e-12),
+            init_norm_to_current=args.pretrained,
+        )
+def disable_spectral_reparam(model: nn.Module):
+    for mod in model.modules():
+        if isinstance(mod, Attention):
+            parametrize.remove_parametrizations(mod.qkv, 'weight')
+            pass
+        elif isinstance(mod, Mlp):
+            parametrize.remove_parametrizations(mod.fc1, 'weight')
+            parametrize.remove_parametrizations(mod.fc2, 'weight')
+            pass
+if __name__ == '__main__':
+    import argparse
+    from . import radio_model as create_model
+    parser = argparse.ArgumentParser(description='Remove parametrization from state dict')
+    parser.add_argument('--checkpoint', type=str, required=True, help='The checkpoint to load')
+    parser.add_argument('--output', type=str, default='', help='Where to store the checkpoint')
+    parser.add_argument('--release', default=False, action='store_true', help='Prune extraneous checkpoint fields')
+    parser.add_argument('--strict', default=False, action='store_true', help='Strictly load the state dict')
+    args = parser.parse_args()
+    if not args.output:
+        chk_dir, chk_name = os.path.split(args.checkpoint)
+        args.output = os.path.join(chk_dir, f'clean_{chk_name}')
+        print(f'Set output to "{args.output}"')
+    chk = torch.load(args.checkpoint, map_location='cpu', mmap=True)
+    model = create_model.create_model_from_args(chk['args'])
+    key = 'base_model.'
+    mod_state = dict()
+    extra_state = dict()
+    for k, v in chk['state_dict'].items():
+        if k.startswith(key):
+            mod_state[k[len(key):]] = v
+        else:
+            extra_state[k] = v
+    chk_load_info = model.load_state_dict(mod_state, strict=args.strict)
+    if chk_load_info.unexpected_keys or chk_load_info.missing_keys:
+        print(chk_load_info)
+    if chk['args'].spectral_reparam:
+        disable_spectral_reparam(model)
+    if hasattr(chk['args'], 'dtype'):
+        model.to(dtype=chk['args'].dtype)
+    mod_state = model.state_dict()
+    final_state = dict()
+    final_state.update({f'{key}{k}': v for k, v in mod_state.items()})
+    final_state.update(extra_state)
+    chk['state_dict'] = final_state
+    chk['args'].spectral_reparam = False
+    if args.release:
+        chk = {
+            'arch': chk['arch'],
+            'epoch': chk['epoch'],
+            'state_dict': chk['state_dict'],
+            'args': chk['args'],
+        }
+    torch.save(chk, args.output)
+    pass

eradio_model.py CHANGED Viewed

@@ -1162,6 +1162,9 @@ class FasterViT(nn.Module):
         return {'rpb'}
     def forward_features(self, x):
         x = self.patch_embed(x)
         full_features = None
         for il, level in enumerate(self.levels):

         return {'rpb'}
     def forward_features(self, x):
+        _, _, H, W = x.shape
+        if H % 32 != 0 or W % 32 != 0:
+            raise ValueError(f"E-RADIO requires input dimensions to be divisible by 32 but got H x W: {H} x {W}")
         x = self.patch_embed(x)
         full_features = None
         for il, level in enumerate(self.levels):

hf_model.py CHANGED Viewed

@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from collections import namedtuple
-from typing import Optional, List, Union
 from timm.models import VisionTransformer
 import torch
@@ -20,6 +20,7 @@ from transformers import PretrainedConfig, PreTrainedModel
 from .common import RESOURCE_MAP, DEFAULT_VERSION
 # Force import of eradio_model in order to register it.
 from .eradio_model import eradio
 from .radio_model import create_model_from_args
@@ -122,5 +123,14 @@ class RADIOModel(PreTrainedModel):
     def input_conditioner(self) -> InputConditioner:
         return self.radio_model.input_conditioner
     def forward(self, x: torch.Tensor):
         return self.radio_model.forward(x)

 # See the License for the specific language governing permissions and
 # limitations under the License.
 from collections import namedtuple
+from typing import Callable, Optional, List, Union
 from timm.models import VisionTransformer
 import torch
 from .common import RESOURCE_MAP, DEFAULT_VERSION
 # Force import of eradio_model in order to register it.
 from .eradio_model import eradio
 from .radio_model import create_model_from_args
     def input_conditioner(self) -> InputConditioner:
         return self.radio_model.input_conditioner
+    @input_conditioner.setter
+    def input_conditioner(self, v: InputConditioner):
+        self.radio_model.input_conditioner = v
+    def make_preprocessor_external(self) -> Callable[[torch.Tensor], torch.Tensor]:
+        ret = self.input_conditioner
+        self.input_conditioner = nn.Identity()
+        return ret
     def forward(self, x: torch.Tensor):
         return self.radio_model.forward(x)

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:df75c4351ef558af885acbf0d21ad53fd273e3720b5ae3d1e7d4a23df1ca9ed1
-size 1306581088

 version https://git-lfs.github.com/spec/v1
+oid sha256:03534ca8b7a26b0cbf69073b944fdd47f41aedad1b3b01c1e387c27191abc8de
+size 1304018880

radio_model.py CHANGED Viewed

@@ -18,6 +18,7 @@ from .input_conditioner import InputConditioner
 from . import extra_timm_models
 from .adaptor_base import AdaptorBase, RadioOutput, AdaptorInput
 from . import eradio_model
 class Resolution(NamedTuple):
@@ -106,6 +107,12 @@ class RADIOModel(nn.Module):
             fn()
     def forward(self, x: torch.Tensor) -> Union[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]]:
         x = self.input_conditioner(x)
         y = self.model.forward_features(x)
@@ -180,6 +187,11 @@ def create_model_from_args(args) -> nn.Module:
         **args.model_kwargs,
     )
     assert (
         not args.cls_token_per_teacher or args.cpe_max_size is not None
     ), "CPE must be enabled for multiple CLS tokens!"
@@ -192,4 +204,7 @@ def create_model_from_args(args) -> nn.Module:
             register_multiple=args.register_multiple,
         )
     return model

 from . import extra_timm_models
 from .adaptor_base import AdaptorBase, RadioOutput, AdaptorInput
 from . import eradio_model
+from .enable_spectral_reparam import configure_spectral_reparam_from_args
 class Resolution(NamedTuple):
             fn()
     def forward(self, x: torch.Tensor) -> Union[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]]:
+        res_step = self.min_resolution_step
+        if res_step is not None and (x.shape[-2] % res_step != 0 or x.shape[-1] % res_step != 0):
+            raise ValueError('The input resolution must be a multiple of `self.min_resolution_step`. '
+                             '`self.get_nearest_supported_resolution(<height>, <width>) is provided as a convenience API. '
+                             f'Input: {x.shape[-2:]}, Nearest: {self.get_nearest_supported_resolution(*x.shape[-2:])}')
         x = self.input_conditioner(x)
         y = self.model.forward_features(x)
         **args.model_kwargs,
     )
+    if hasattr(model, 'norm') and not getattr(args, 'model_norm', False):
+        model.norm = nn.Identity()
+    model.head = nn.Identity()
     assert (
         not args.cls_token_per_teacher or args.cpe_max_size is not None
     ), "CPE must be enabled for multiple CLS tokens!"
             register_multiple=args.register_multiple,
         )
+    if args.spectral_reparam:
+        configure_spectral_reparam_from_args(model, args)
     return model