File size: 1,561 Bytes
f01c2b4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
#from transformers import PretrainedConfig
from transformers import PretrainedConfig


class ApolloConfig(PretrainedConfig):
    model_type = "apollo"
    def __init__(
        self,
        llm_cfg=None,
        vision_tower_cfg=None,
        mm_connector_cfg=None,
        architectures=None,
        resume_path=None,
        image_aspect_ratio=None,
        num_video_frames=None,
        mm_vision_select_layer=None,
        mm_vision_select_feature=None,
        use_mm_start_end=False,
        use_mm_patch_token=True,
        mm_connector_lr=None,
        vision_resolution=None,
        interpolate_mode=None,
        clip_duration=None,
        vocab_size=None,
        auto_map=None,
        **kwargs
    ):
        super().__init__(**kwargs)
        
        self.architectures = architectures
        self.llm_cfg = llm_cfg
        self.vision_tower_cfg = vision_tower_cfg
        self.mm_connector_cfg = mm_connector_cfg
        self.resume_path = resume_path
        self.image_aspect_ratio = image_aspect_ratio
        self.num_video_frames = num_video_frames
        self.mm_vision_select_layer = mm_vision_select_layer
        self.mm_vision_select_feature = mm_vision_select_feature
        self.use_mm_start_end = use_mm_start_end
        self.use_mm_patch_token = use_mm_patch_token
        self.mm_connector_lr = mm_connector_lr
        self.vision_resolution = vision_resolution
        self.interpolate_mode = interpolate_mode
        self.clip_duration = clip_duration
        self.vocab_size=vocab_size
        self.auto_map=auto_map