spow12 commited on
Commit
bb0b1da
·
verified ·
1 Parent(s): b9cb771

Update config.json

Browse files
Files changed (1) hide show
  1. config.json +4 -134
config.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "_name_or_path": "/data2/model_weights/multimodal/mistral-community/pixtral-12b/ChatWaifu_2.0_vision",
3
- "architectures": [
4
  "LlavaForConditionalGeneration"
5
  ],
6
  "ignore_index": -100,
@@ -9,160 +9,30 @@
9
  "model_type": "llava",
10
  "projector_hidden_act": "gelu",
11
  "text_config": {
12
- "_name_or_path": "",
13
- "add_cross_attention": false,
14
- "architectures": null,
15
- "attention_dropout": 0.0,
16
- "bad_words_ids": null,
17
- "begin_suppress_tokens": null,
18
- "bos_token_id": 1,
19
- "chunk_size_feed_forward": 0,
20
- "cross_attention_hidden_size": null,
21
- "decoder_start_token_id": null,
22
- "diversity_penalty": 0.0,
23
- "do_sample": false,
24
- "early_stopping": false,
25
- "encoder_no_repeat_ngram_size": 0,
26
- "eos_token_id": 2,
27
- "exponential_decay_length_penalty": null,
28
- "finetuning_task": null,
29
- "forced_bos_token_id": null,
30
- "forced_eos_token_id": null,
31
- "head_dim": 128,
32
- "hidden_act": "silu",
33
  "hidden_size": 5120,
34
- "id2label": {
35
- "0": "LABEL_0",
36
- "1": "LABEL_1"
37
- },
38
- "initializer_range": 0.02,
39
  "intermediate_size": 14336,
40
  "is_composition": true,
41
- "is_decoder": false,
42
- "is_encoder_decoder": false,
43
- "label2id": {
44
- "LABEL_0": 0,
45
- "LABEL_1": 1
46
- },
47
- "length_penalty": 1.0,
48
- "max_length": 20,
49
  "max_position_embeddings": 1024000,
50
- "min_length": 0,
51
  "model_type": "mistral",
52
- "no_repeat_ngram_size": 0,
53
- "num_attention_heads": 32,
54
- "num_beam_groups": 1,
55
- "num_beams": 1,
56
  "num_hidden_layers": 40,
57
  "num_key_value_heads": 8,
58
- "num_return_sequences": 1,
59
- "output_attentions": false,
60
- "output_hidden_states": false,
61
- "output_scores": false,
62
- "pad_token_id": null,
63
- "prefix": null,
64
- "problem_type": null,
65
- "pruned_heads": {},
66
- "remove_invalid_values": false,
67
- "repetition_penalty": 1.0,
68
- "return_dict": true,
69
- "return_dict_in_generate": false,
70
  "rms_norm_eps": 1e-05,
71
  "rope_theta": 1000000000.0,
72
- "sep_token_id": null,
73
  "sliding_window": null,
74
- "suppress_tokens": null,
75
- "task_specific_params": null,
76
- "temperature": 1.0,
77
- "tf_legacy_loss": false,
78
- "tie_encoder_decoder": false,
79
- "tie_word_embeddings": false,
80
- "tokenizer_class": null,
81
- "top_k": 50,
82
- "top_p": 1.0,
83
- "torch_dtype": null,
84
- "torchscript": false,
85
- "typical_p": 1.0,
86
- "use_bfloat16": false,
87
- "use_cache": true,
88
  "vocab_size": 131072
89
  },
90
  "torch_dtype": "bfloat16",
91
- "transformers_version": "4.45.2",
92
  "vision_config": {
93
- "_name_or_path": "",
94
- "add_cross_attention": false,
95
- "architectures": null,
96
- "attention_dropout": 0.0,
97
- "bad_words_ids": null,
98
- "begin_suppress_tokens": null,
99
- "bos_token_id": null,
100
- "chunk_size_feed_forward": 0,
101
- "cross_attention_hidden_size": null,
102
- "decoder_start_token_id": null,
103
- "diversity_penalty": 0.0,
104
- "do_sample": false,
105
- "early_stopping": false,
106
- "encoder_no_repeat_ngram_size": 0,
107
- "eos_token_id": null,
108
- "exponential_decay_length_penalty": null,
109
- "finetuning_task": null,
110
- "forced_bos_token_id": null,
111
- "forced_eos_token_id": null,
112
  "head_dim": 64,
113
  "hidden_act": "silu",
114
- "hidden_size": 1024,
115
- "id2label": {
116
- "0": "LABEL_0",
117
- "1": "LABEL_1"
118
- },
119
  "image_size": 1024,
120
- "intermediate_size": 4096,
121
  "is_composition": true,
122
- "is_decoder": false,
123
- "is_encoder_decoder": false,
124
- "label2id": {
125
- "LABEL_0": 0,
126
- "LABEL_1": 1
127
- },
128
- "length_penalty": 1.0,
129
- "max_length": 20,
130
- "min_length": 0,
131
  "model_type": "pixtral",
132
- "no_repeat_ngram_size": 0,
133
- "num_attention_heads": 16,
134
- "num_beam_groups": 1,
135
- "num_beams": 1,
136
- "num_channels": 3,
137
- "num_hidden_layers": 24,
138
- "num_return_sequences": 1,
139
- "output_attentions": false,
140
- "output_hidden_states": false,
141
- "output_scores": false,
142
- "pad_token_id": null,
143
  "patch_size": 16,
144
- "prefix": null,
145
- "problem_type": null,
146
- "pruned_heads": {},
147
- "remove_invalid_values": false,
148
- "repetition_penalty": 1.0,
149
- "return_dict": true,
150
- "return_dict_in_generate": false,
151
  "rope_theta": 10000.0,
152
- "sep_token_id": null,
153
- "suppress_tokens": null,
154
- "task_specific_params": null,
155
- "temperature": 1.0,
156
- "tf_legacy_loss": false,
157
- "tie_encoder_decoder": false,
158
- "tie_word_embeddings": false,
159
- "tokenizer_class": null,
160
- "top_k": 50,
161
- "top_p": 1.0,
162
- "torch_dtype": null,
163
- "torchscript": false,
164
- "typical_p": 1.0,
165
- "use_bfloat16": false
166
  },
167
  "vision_feature_layer": -1,
168
  "vision_feature_select_strategy": "full"
 
1
  {
2
  "_name_or_path": "/data2/model_weights/multimodal/mistral-community/pixtral-12b/ChatWaifu_2.0_vision",
3
+ "architectures": [
4
  "LlavaForConditionalGeneration"
5
  ],
6
  "ignore_index": -100,
 
9
  "model_type": "llava",
10
  "projector_hidden_act": "gelu",
11
  "text_config": {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  "hidden_size": 5120,
13
+ "head_dim": 128,
 
 
 
 
14
  "intermediate_size": 14336,
15
  "is_composition": true,
 
 
 
 
 
 
 
 
16
  "max_position_embeddings": 1024000,
 
17
  "model_type": "mistral",
 
 
 
 
18
  "num_hidden_layers": 40,
19
  "num_key_value_heads": 8,
 
 
 
 
 
 
 
 
 
 
 
 
20
  "rms_norm_eps": 1e-05,
21
  "rope_theta": 1000000000.0,
 
22
  "sliding_window": null,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  "vocab_size": 131072
24
  },
25
  "torch_dtype": "bfloat16",
26
+ "transformers_version": null,
27
  "vision_config": {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  "head_dim": 64,
29
  "hidden_act": "silu",
 
 
 
 
 
30
  "image_size": 1024,
 
31
  "is_composition": true,
 
 
 
 
 
 
 
 
 
32
  "model_type": "pixtral",
 
 
 
 
 
 
 
 
 
 
 
33
  "patch_size": 16,
 
 
 
 
 
 
 
34
  "rope_theta": 10000.0,
35
+ "tie_word_embeddings": false
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  },
37
  "vision_feature_layer": -1,
38
  "vision_feature_select_strategy": "full"