Upload model

Browse files

Files changed (7) hide show

config.py +28 -0
configuration_cetacean_classifier.py +12 -0
metric_learning.py +59 -0
model.safetensors +1 -1
modeling_cetacean_classifier.py +92 -0
train.py +150 -0
utils.py +41 -0

config.py ADDED Viewed

	@@ -0,0 +1,28 @@

+from typing import Optional
+import yaml
+class Config(dict):
+    def __getattr__(self, key):
+        try:
+            val = self[key]
+        except KeyError:
+            return super().__getattr__(key)
+        if isinstance(val, dict):
+            return Config(val)
+        return val
+def load_config(path: str, default_path: Optional[str]) -> Config:
+    with open(path) as f:
+        cfg = Config(yaml.full_load(f))
+    if default_path is not None:
+        # set keys not included in `path` by default
+        with open(default_path) as f:
+            default_cfg = Config(yaml.full_load(f))
+        for key, val in default_cfg.items():
+            if key not in cfg:
+                print(f"used default config {key}: {val}")
+                cfg[key] = val
+    return cfg

configuration_cetacean_classifier.py ADDED Viewed

	@@ -0,0 +1,12 @@

+from transformers import PretrainedConfig
+from typing import List
+class CetaceanClassifierConfig(PretrainedConfig):
+    model_type = "cetaceanet"
+    def __init__(
+        self,
+        **kwargs,
+    ):
+        super().__init__(**kwargs)

metric_learning.py ADDED Viewed

	@@ -0,0 +1,59 @@

+import math
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+class GeM(nn.Module):
+    def __init__(self, p=3, eps=1e-6, requires_grad=False):
+        super().__init__()
+        self.p = nn.Parameter(torch.ones(1) * p, requires_grad=requires_grad)
+        self.eps = eps
+    def forward(self, x: torch.Tensor):
+        return x.clamp(min=self.eps).pow(self.p).mean((-2, -1)).pow(1.0 / self.p)
+# Copied and modified from
+# https://github.com/ChristofHenkel/kaggle-landmark-2021-1st-place/blob/034a7d8665bb4696981698348c9370f2d4e61e35/models/ch_mdl_dolg_efficientnet.py
+class ArcMarginProductSubcenter(nn.Module):
+    def __init__(self, in_features: int, out_features: int, k: int = 3):
+        super().__init__()
+        self.weight = nn.Parameter(torch.FloatTensor(out_features * k, in_features))
+        self.reset_parameters()
+        self.k = k
+        self.out_features = out_features
+    def reset_parameters(self):
+        stdv = 1.0 / math.sqrt(self.weight.size(1))
+        self.weight.data.uniform_(-stdv, stdv)
+    def forward(self, features: torch.Tensor) -> torch.Tensor:
+        cosine_all = F.linear(F.normalize(features), F.normalize(self.weight))
+        cosine_all = cosine_all.view(-1, self.out_features, self.k)
+        cosine, _ = torch.max(cosine_all, dim=2)
+        return cosine
+class ArcFaceLossAdaptiveMargin(nn.modules.Module):
+    def __init__(self, margins: np.ndarray, n_classes: int, s: float = 30.0):
+        super().__init__()
+        self.s = s
+        self.margins = margins
+        self.out_dim = n_classes
+    def forward(self, logits: torch.Tensor, labels: torch.Tensor) -> torch.Tensor:
+        ms = self.margins[labels.cpu().numpy()]
+        cos_m = torch.from_numpy(np.cos(ms)).float().cuda()
+        sin_m = torch.from_numpy(np.sin(ms)).float().cuda()
+        th = torch.from_numpy(np.cos(math.pi - ms)).float().cuda()
+        mm = torch.from_numpy(np.sin(math.pi - ms) * ms).float().cuda()
+        labels = F.one_hot(labels, self.out_dim).float()
+        logits = logits.float()
+        cosine = logits
+        sine = torch.sqrt(1.0 - torch.pow(cosine, 2))
+        phi = cosine * cos_m.view(-1, 1) - sine * sin_m.view(-1, 1)
+        phi = torch.where(cosine > th.view(-1, 1), phi, cosine - mm.view(-1, 1))
+        return ((labels * phi) + ((1.0 - labels) * cosine)) * self.s

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fd07eaf5ca7a871b9162257e44e9bade9312ef1378ea1d77476b35253dda14dd
 size 296028464

 version https://git-lfs.github.com/spec/v1
+oid sha256:648dd257e82e5d02a1e649cfb3193554c096e40e520f903efe150d746ddd70fa
 size 296028464

modeling_cetacean_classifier.py ADDED Viewed

	@@ -0,0 +1,92 @@

+import albumentations as A
+from transformers import PreTrainedModel
+# from PIL import Image
+import numpy as np
+import torch
+import cv2
+from .train import SphereClassifier
+from .configuration_cetacean_classifier import CetaceanClassifierConfig
+WHALE_CLASSES = np.array(
+    [
+        "beluga",
+        "blue_whale",
+        "bottlenose_dolphin",
+        "brydes_whale",
+        "commersons_dolphin",
+        "common_dolphin",
+        "cuviers_beaked_whale",
+        "dusky_dolphin",
+        "false_killer_whale",
+        "fin_whale",
+        "frasiers_dolphin",
+        "gray_whale",
+        "humpback_whale",
+        "killer_whale",
+        "long_finned_pilot_whale",
+        "melon_headed_whale",
+        "minke_whale",
+        "pantropic_spotted_dolphin",
+        "pygmy_killer_whale",
+        "rough_toothed_dolphin",
+        "sei_whale",
+        "short_finned_pilot_whale",
+        "southern_right_whale",
+        "spinner_dolphin",
+        "spotted_dolphin",
+        "white_sided_dolphin",
+    ]
+)
+class CetaceanClassifierModelForImageClassification(PreTrainedModel):
+    config_class = CetaceanClassifierConfig
+    def __init__(self, config):
+        super().__init__(config)
+        self.model = SphereClassifier(cfg=config.to_dict())
+        # load_from_checkpoint("cetacean_classifier/last.ckpt")
+        # self.model = SphereClassifier.load_from_checkpoint("cetacean_classifier/last.ckpt")
+        self.model.eval()
+        self.config = config
+        self.transforms = self.make_transforms(data_aug=True)
+    def make_transforms(self, data_aug: bool):
+        augments = []
+        if data_aug:
+            aug = self.config.aug
+            augments = [
+                A.RandomResizedCrop(
+                    self.config.image_size[0],
+                    self.config.image_size[1],
+                    scale=(aug["crop_scale"], 1.0),
+                    ratio=(aug["crop_l"], aug["crop_r"]),
+                ),]
+        return A.Compose(augments)
+    def preprocess_image(self, img) -> torch.Tensor:
+        rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+        image = cv2.resize(rgb, self.config.image_size, interpolation=cv2.INTER_CUBIC)
+        image =  self.transforms(image=image)["image"]
+        return torch.Tensor(image).transpose(2, 0).unsqueeze(0)
+        #image_resized = img.resize((480, 480))
+        #image_resized = np.array(image_resized)[None]
+        #image_resized = np.transpose(image_resized, [0, 3, 2, 1])
+        #image_tensor = torch.Tensor(image_resized)
+        #return image_tensor
+    def forward(self, img, labels=None):
+        tensor = self.preprocess_image(img)
+        head_id_logits, head_species_logits = self.model(tensor)
+        head_species_logits = head_species_logits.detach().numpy()
+        sorted_idx = head_species_logits.argsort()[0]
+        sorted_idx = np.array(list(reversed(sorted_idx)))
+        top_three_logits = sorted_idx[:3]
+        top_three_whale_preds = WHALE_CLASSES[top_three_logits]
+        return {"predictions": top_three_whale_preds}

train.py ADDED Viewed

	@@ -0,0 +1,150 @@

+from typing import Dict, List, Optional, Tuple
+import numpy as np
+import timm
+import torch
+from pytorch_lightning import LightningDataModule, LightningModule, Trainer
+from .config import Config, load_config
+# from .dataset import WhaleDataset, load_df
+from .metric_learning import ArcFaceLossAdaptiveMargin, ArcMarginProductSubcenter, GeM
+from .utils import WarmupCosineLambda, map_dict, topk_average_precision
+class SphereClassifier(LightningModule):
+    def __init__(self, cfg: dict, id_class_nums=None, species_class_nums=None):
+        super().__init__()
+        # import pdb; pdb.set_trace()
+        if not isinstance(cfg, Config):
+            cfg = Config(cfg)
+        self.save_hyperparameters(cfg, ignore=["id_class_nums", "species_class_nums"])
+        self.test_results_fp = None
+        # import json
+        # cfg_json = json.dumps(cfg)
+        # with open("config_extracted.json", "w") as file:
+        #     file.write(cfg_json)
+        # NN architecture
+        self.backbone = timm.create_model(
+            cfg.model_name,
+            in_chans=3,
+            pretrained=cfg.pretrained,
+            num_classes=0,
+            features_only=True,
+            out_indices=cfg.out_indices,
+        )
+        feature_dims = self.backbone.feature_info.channels()
+        print(f"feature dims: {feature_dims}")
+        self.global_pools = torch.nn.ModuleList(
+            [GeM(p=cfg.global_pool.p, requires_grad=cfg.global_pool.train) for _ in cfg.out_indices]
+        )
+        self.mid_features = np.sum(feature_dims)
+        if cfg.normalization == "batchnorm":
+            self.neck = torch.nn.BatchNorm1d(self.mid_features)
+        elif cfg.normalization == "layernorm":
+            self.neck = torch.nn.LayerNorm(self.mid_features)
+        self.head_id = ArcMarginProductSubcenter(self.mid_features, cfg.num_classes, cfg.n_center_id)
+        self.head_species = ArcMarginProductSubcenter(self.mid_features, cfg.num_species_classes, cfg.n_center_species)
+        if id_class_nums is not None and species_class_nums is not None:
+            margins_id = np.power(id_class_nums, cfg.margin_power_id) * cfg.margin_coef_id + cfg.margin_cons_id
+            margins_species = (
+                np.power(species_class_nums, cfg.margin_power_species) * cfg.margin_coef_species
+                + cfg.margin_cons_species
+            )
+            print("margins_id", margins_id)
+            print("margins_species", margins_species)
+            self.margin_fn_id = ArcFaceLossAdaptiveMargin(margins_id, cfg.num_classes, cfg.s_id)
+            self.margin_fn_species = ArcFaceLossAdaptiveMargin(margins_species, cfg.num_species_classes, cfg.s_species)
+            self.loss_fn_id = torch.nn.CrossEntropyLoss()
+            self.loss_fn_species = torch.nn.CrossEntropyLoss()
+    def get_feat(self, x: torch.Tensor) -> torch.Tensor:
+        ms = self.backbone(x)
+        h = torch.cat([global_pool(m) for m, global_pool in zip(ms, self.global_pools)], dim=1)
+        return self.neck(h)
+    def forward(self, x: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
+        feat = self.get_feat(x)
+        return self.head_id(feat), self.head_species(feat)
+    def training_step(self, batch, batch_idx):
+        x, ids, species = batch["image"], batch["label"], batch["label_species"]
+        logits_ids, logits_species = self(x)
+        margin_logits_ids = self.margin_fn_id(logits_ids, ids)
+        loss_ids = self.loss_fn_id(margin_logits_ids, ids)
+        loss_species = self.loss_fn_species(self.margin_fn_species(logits_species, species), species)
+        self.log_dict({"train/loss_ids": loss_ids.detach()}, on_step=False, on_epoch=True)
+        self.log_dict({"train/loss_species": loss_species.detach()}, on_step=False, on_epoch=True)
+        with torch.no_grad():
+            self.log_dict(map_dict(logits_ids, ids, "train"), on_step=False, on_epoch=True)
+            self.log_dict(
+                {"train/acc_species": topk_average_precision(logits_species, species, 1).mean().detach()},
+                on_step=False,
+                on_epoch=True,
+            )
+        return loss_ids * self.hparams.loss_id_ratio + loss_species * (1 - self.hparams.loss_id_ratio)
+    def validation_step(self, batch, batch_idx):
+        x, ids, species = batch["image"], batch["label"], batch["label_species"]
+        out1, out_species1 = self(x)
+        out2, out_species2 = self(x.flip(3))
+        output, output_species = (out1 + out2) / 2, (out_species1 + out_species2) / 2
+        self.log_dict(map_dict(output, ids, "val"), on_step=False, on_epoch=True)
+        self.log_dict(
+            {"val/acc_species": topk_average_precision(output_species, species, 1).mean().detach()},
+            on_step=False,
+            on_epoch=True,
+        )
+    def configure_optimizers(self):
+        backbone_params = list(self.backbone.parameters()) + list(self.global_pools.parameters())
+        head_params = (
+            list(self.neck.parameters()) + list(self.head_id.parameters()) + list(self.head_species.parameters())
+        )
+        params = [
+            {"params": backbone_params, "lr": self.hparams.lr_backbone},
+            {"params": head_params, "lr": self.hparams.lr_head},
+        ]
+        if self.hparams.optimizer == "Adam":
+            optimizer = torch.optim.Adam(params)
+        elif self.hparams.optimizer == "AdamW":
+            optimizer = torch.optim.AdamW(params)
+        elif self.hparams.optimizer == "RAdam":
+            optimizer = torch.optim.RAdam(params)
+        warmup_steps = self.hparams.max_epochs * self.hparams.warmup_steps_ratio
+        cycle_steps = self.hparams.max_epochs - warmup_steps
+        lr_lambda = WarmupCosineLambda(warmup_steps, cycle_steps, self.hparams.lr_decay_scale)
+        scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda)
+        return [optimizer], [scheduler]
+    def test_step(self, batch, batch_idx):
+        x = batch["image"]
+        feat1 = self.get_feat(x)
+        out1, out_species1 = self.head_id(feat1), self.head_species(feat1)
+        feat2 = self.get_feat(x.flip(3))
+        out2, out_species2 = self.head_id(feat2), self.head_species(feat2)
+        pred_logit, pred_idx = ((out1 + out2) / 2).cpu().sort(descending=True)
+        return {
+            "original_index": batch["original_index"],
+            "label": batch["label"],
+            "label_species": batch["label_species"],
+            "pred_logit": pred_logit[:, :1000],
+            "pred_idx": pred_idx[:, :1000],
+            "pred_species": ((out_species1 + out_species2) / 2).cpu(),
+            "embed_features1": feat1.cpu(),
+            "embed_features2": feat2.cpu(),
+        }
+    def test_epoch_end(self, outputs: List[Dict[str, torch.Tensor]]):
+        outputs = self.all_gather(outputs)
+        if self.trainer.global_rank == 0:
+            epoch_results: Dict[str, np.ndarray] = {}
+            for key in outputs[0].keys():
+                if torch.cuda.device_count() > 1:
+                    result = torch.cat([x[key] for x in outputs], dim=1).flatten(end_dim=1)
+                else:
+                    result = torch.cat([x[key] for x in outputs], dim=0)
+                epoch_results[key] = result.detach().cpu().numpy()
+            np.savez_compressed(self.test_results_fp, **epoch_results)

utils.py ADDED Viewed

	@@ -0,0 +1,41 @@

+import math
+from typing import Optional
+import torch
+class WarmupCosineLambda:
+    def __init__(self, warmup_steps: int, cycle_steps: int, decay_scale: float, exponential_warmup: bool = False):
+        self.warmup_steps = warmup_steps
+        self.cycle_steps = cycle_steps
+        self.decay_scale = decay_scale
+        self.exponential_warmup = exponential_warmup
+    def __call__(self, epoch: int):
+        if epoch < self.warmup_steps:
+            if self.exponential_warmup:
+                return self.decay_scale * pow(self.decay_scale, -epoch / self.warmup_steps)
+            ratio = epoch / self.warmup_steps
+        else:
+            ratio = (1 + math.cos(math.pi * (epoch - self.warmup_steps) / self.cycle_steps)) / 2
+        return self.decay_scale + (1 - self.decay_scale) * ratio
+def topk_average_precision(output: torch.Tensor, y: torch.Tensor, k: int):
+    score_array = torch.tensor([1.0 / i for i in range(1, k + 1)], device=output.device)
+    topk = output.topk(k)[1]
+    match_mat = topk == y[:, None].expand(topk.shape)
+    return (match_mat * score_array).sum(dim=1)
+def calc_map5(output: torch.Tensor, y: torch.Tensor, threshold: Optional[float]):
+    if threshold is not None:
+        output = torch.cat([output, torch.full((output.shape[0], 1), threshold, device=output.device)], dim=1)
+    return topk_average_precision(output, y, 5).mean().detach()
+def map_dict(output: torch.Tensor, y: torch.Tensor, prefix: str):
+    d = {f"{prefix}/acc": topk_average_precision(output, y, 1).mean().detach()}
+    for threshold in [None, 0.3, 0.4, 0.5, 0.6, 0.7]:
+        d[f"{prefix}/map{threshold}"] = calc_map5(output, y, threshold)
+    return d