File size: 3,068 Bytes
49bceed
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import os
import sys

sys.path.append(os.path.join(os.path.dirname(__file__), "..", ".."))

import timm
import torch
import torch.nn as nn
from transformers import CLIPModel as CLIPTransformersModel

from utils import configs
from utils.functional import check_data_type_variable, get_device


class CLIPModel(nn.Module):
    def __init__(

        self, model_clip_name: str, freeze_model: bool, pretrained_model: bool

    ):
        super().__init__()
        self.model_clip_name = model_clip_name
        self.freeze_model = freeze_model
        self.pretrained_model = pretrained_model
        self.device = get_device()

        self.check_arguments()
        self.init_model()

    def check_arguments(self):
        check_data_type_variable(self.model_clip_name, str)
        check_data_type_variable(self.freeze_model, bool)
        check_data_type_variable(self.pretrained_model, bool)

        if self.model_clip_name != configs.CLIP_NAME_MODEL:
            raise ValueError(
                f"Model clip name must be {configs.CLIP_NAME_MODEL}, but it is {self.model_clip_name}"
            )

    def init_model(self):
        clip_model = CLIPTransformersModel.from_pretrained(self.model_clip_name)
        for layer in clip_model.children():
            if hasattr(layer, "reset_parameters") and not self.pretrained_model:
                layer.reset_parameters()
        for param in clip_model.parameters():
            param.required_grad = False if not self.freeze_model else True
        self.vision_model = clip_model.vision_model.to(self.device)
        self.visual_projection = clip_model.visual_projection.to(self.device)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x = self.vision_model(x)
        x = self.visual_projection(x.pooler_output)
        return x


class TorchModel(nn.Module):
    def __init__(self, name_model: str, freeze_model: bool, pretrained_model: bool):
        super().__init__()
        self.name_model = name_model
        self.freeze_model = freeze_model
        self.pretrained_model = pretrained_model
        self.device = get_device()

        self.check_arguments()
        self.init_model()

    def check_arguments(self):
        check_data_type_variable(self.name_model, str)
        check_data_type_variable(self.freeze_model, bool)
        check_data_type_variable(self.pretrained_model, bool)

        if self.name_model not in tuple(configs.NAME_MODELS.keys()):
            raise ValueError(
                f"Name model must be in {tuple(configs.NAME_MODELS.keys())}, but it is {self.name_model}"
            )

    def init_model(self):
        self.model = timm.create_model(
            self.name_model, pretrained=self.pretrained_model, num_classes=0
        ).to(self.device)
        for param in self.model.parameters():
            param.required_grad = False if not self.freeze_model else True

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x = self.model(x)
        return x