abetlen commited on
Commit
4242aac
1 Parent(s): 9ad48d8

Update convert script

Browse files
Files changed (1) hide show
  1. convert_image_gguf.py +79 -26
convert_image_gguf.py CHANGED
@@ -133,16 +133,20 @@ def main():
133
  fout.add_array("clip.vision.image_mean", [0.48145466, 0.4578275, 0.40821073])
134
  fout.add_array("clip.vision.image_std", [0.26862954, 0.26130258, 0.27577711])
135
 
 
 
136
  # Vision model tensors
137
  prefix = "model.vision_embed_tokens.img_processor.vision_model."
138
 
139
  fout.add_tensor(
140
  "v.class_embd",
141
- tensors.get_tensor(f"{prefix}embeddings.class_embedding").astype(np.float16),
142
  )
143
  fout.add_tensor(
144
  "v.patch_embd.weight",
145
- tensors.get_tensor(f"{prefix}embeddings.patch_embedding.weight").reshape(1024, 3, 14, 14).astype(np.float16),
 
 
146
  )
147
  fout.add_tensor(
148
  "v.position_embd.weight",
@@ -158,81 +162,130 @@ def main():
158
  tensors.get_tensor("model.vision_embed_tokens.glb_GN").astype(np.float32),
159
  )
160
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
161
  for i in range(clip_vision_config["num_hidden_layers"]):
162
- # layer norm
163
  fout.add_tensor(
164
- f"blk.{i}.attn_norm.weight",
165
  tensors.get_tensor(f"{prefix}encoder.layers.{i}.layer_norm1.weight").astype(np.float32),
166
  )
167
  fout.add_tensor(
168
- f"blk.{i}.attn_norm.bias",
169
  tensors.get_tensor(f"{prefix}encoder.layers.{i}.layer_norm1.bias").astype(np.float32),
170
  )
171
  fout.add_tensor(
172
- f"blk.{i}.ffn_norm.weight",
173
  tensors.get_tensor(f"{prefix}encoder.layers.{i}.layer_norm2.weight").astype(np.float32),
174
  )
175
  fout.add_tensor(
176
- f"blk.{i}.ffn_norm.bias",
177
  tensors.get_tensor(f"{prefix}encoder.layers.{i}.layer_norm2.bias").astype(np.float32),
178
  )
179
 
180
  # feed forward
181
  fout.add_tensor(
182
- f"blk.{i}.ffn_down.weight",
183
  tensors.get_tensor(f"{prefix}encoder.layers.{i}.mlp.fc1.weight").astype(np.float16),
184
  )
185
  fout.add_tensor(
186
- f"blk.{i}.ffn_down.bias",
187
- tensors.get_tensor(f"{prefix}encoder.layers.{i}.mlp.fc1.bias").astype(np.float16),
188
  )
189
  fout.add_tensor(
190
- f"blk.{i}.ffn_up.weight",
191
  tensors.get_tensor(f"{prefix}encoder.layers.{i}.mlp.fc2.weight").astype(np.float16),
192
  )
193
  fout.add_tensor(
194
- f"blk.{i}.ffn_up.bias",
195
- tensors.get_tensor(f"{prefix}encoder.layers.{i}.mlp.fc2.bias").astype(np.float16),
196
  )
197
 
198
  # attention
199
  fout.add_tensor(
200
- f"blk.{i}.attn_k.weight",
201
  tensors.get_tensor(f"{prefix}encoder.layers.{i}.self_attn.k_proj.weight").astype(np.float16),
202
  )
203
  fout.add_tensor(
204
- f"blk.{i}.attn_k.bias",
205
- tensors.get_tensor(f"{prefix}encoder.layers.{i}.self_attn.k_proj.bias").astype(np.float16),
206
  )
207
  fout.add_tensor(
208
- f"blk.{i}.attn_output.weight",
209
  tensors.get_tensor(f"{prefix}encoder.layers.{i}.self_attn.out_proj.weight").astype(np.float16),
210
  )
211
  fout.add_tensor(
212
- f"blk.{i}.attn_output.bias",
213
- tensors.get_tensor(f"{prefix}encoder.layers.{i}.self_attn.out_proj.bias").astype(np.float16),
214
  )
215
  fout.add_tensor(
216
- f"blk.{i}.attn_q.weight",
217
  tensors.get_tensor(f"{prefix}encoder.layers.{i}.self_attn.q_proj.weight").astype(np.float16),
218
  )
219
  fout.add_tensor(
220
- f"blk.{i}.attn_q.bias",
221
- tensors.get_tensor(f"{prefix}encoder.layers.{i}.self_attn.q_proj.bias").astype(np.float16),
222
  )
223
  fout.add_tensor(
224
- f"blk.{i}.attn_v.weight",
225
  tensors.get_tensor(f"{prefix}encoder.layers.{i}.self_attn.v_proj.weight").astype(np.float16),
226
  )
227
  fout.add_tensor(
228
- f"blk.{i}.attn_v.bias",
229
- tensors.get_tensor(f"{prefix}encoder.layers.{i}.self_attn.v_proj.bias").astype(np.float16),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
230
  )
231
 
232
  fout.add_tensor(
233
- "output_norm.weight",
234
  tensors.get_tensor(f"{prefix}post_layernorm.weight").astype(np.float32),
235
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
236
 
237
  fout.write_header_to_file()
238
  fout.write_kv_data_to_file()
 
133
  fout.add_array("clip.vision.image_mean", [0.48145466, 0.4578275, 0.40821073])
134
  fout.add_array("clip.vision.image_std", [0.26862954, 0.26130258, 0.27577711])
135
 
136
+ fout.add_bool("clip.use_gelu", clip_vision_config["hidden_act"] != "quick_gelu")
137
+
138
  # Vision model tensors
139
  prefix = "model.vision_embed_tokens.img_processor.vision_model."
140
 
141
  fout.add_tensor(
142
  "v.class_embd",
143
+ tensors.get_tensor(f"{prefix}embeddings.class_embedding").astype(np.float32),
144
  )
145
  fout.add_tensor(
146
  "v.patch_embd.weight",
147
+ tensors.get_tensor(f"{prefix}embeddings.patch_embedding.weight")
148
+ .reshape(clip_vision_config["hidden_size"], 3, clip_vision_config["patch_size"], clip_vision_config["patch_size"])
149
+ .astype(np.float16),
150
  )
151
  fout.add_tensor(
152
  "v.position_embd.weight",
 
162
  tensors.get_tensor("model.vision_embed_tokens.glb_GN").astype(np.float32),
163
  )
164
 
165
+ fout.add_tensor(
166
+ "mm.0.weight",
167
+ tensors.get_tensor("model.vision_embed_tokens.img_projection.0.weight").astype(np.float16),
168
+ )
169
+ fout.add_tensor(
170
+ "mm.0.bias",
171
+ tensors.get_tensor("model.vision_embed_tokens.img_projection.0.bias").astype(np.float32),
172
+ )
173
+
174
+ fout.add_tensor(
175
+ "mm.2.weight",
176
+ tensors.get_tensor("model.vision_embed_tokens.img_projection.2.weight").astype(np.float16),
177
+ )
178
+ fout.add_tensor(
179
+ "mm.2.bias",
180
+ tensors.get_tensor("model.vision_embed_tokens.img_projection.2.bias").astype(np.float32),
181
+ )
182
+
183
  for i in range(clip_vision_config["num_hidden_layers"]):
184
+ # attention norm
185
  fout.add_tensor(
186
+ f"v.blk.{i}.attn_norm.weight",
187
  tensors.get_tensor(f"{prefix}encoder.layers.{i}.layer_norm1.weight").astype(np.float32),
188
  )
189
  fout.add_tensor(
190
+ f"v.blk.{i}.attn_norm.bias",
191
  tensors.get_tensor(f"{prefix}encoder.layers.{i}.layer_norm1.bias").astype(np.float32),
192
  )
193
  fout.add_tensor(
194
+ f"v.blk.{i}.ffn_norm.weight",
195
  tensors.get_tensor(f"{prefix}encoder.layers.{i}.layer_norm2.weight").astype(np.float32),
196
  )
197
  fout.add_tensor(
198
+ f"v.blk.{i}.ffn_norm.bias",
199
  tensors.get_tensor(f"{prefix}encoder.layers.{i}.layer_norm2.bias").astype(np.float32),
200
  )
201
 
202
  # feed forward
203
  fout.add_tensor(
204
+ f"v.blk.{i}.ffn_down.weight",
205
  tensors.get_tensor(f"{prefix}encoder.layers.{i}.mlp.fc1.weight").astype(np.float16),
206
  )
207
  fout.add_tensor(
208
+ f"v.blk.{i}.ffn_down.bias",
209
+ tensors.get_tensor(f"{prefix}encoder.layers.{i}.mlp.fc1.bias").astype(np.float32),
210
  )
211
  fout.add_tensor(
212
+ f"v.blk.{i}.ffn_up.weight",
213
  tensors.get_tensor(f"{prefix}encoder.layers.{i}.mlp.fc2.weight").astype(np.float16),
214
  )
215
  fout.add_tensor(
216
+ f"v.blk.{i}.ffn_up.bias",
217
+ tensors.get_tensor(f"{prefix}encoder.layers.{i}.mlp.fc2.bias").astype(np.float32),
218
  )
219
 
220
  # attention
221
  fout.add_tensor(
222
+ f"v.blk.{i}.attn_k.weight",
223
  tensors.get_tensor(f"{prefix}encoder.layers.{i}.self_attn.k_proj.weight").astype(np.float16),
224
  )
225
  fout.add_tensor(
226
+ f"v.blk.{i}.attn_k.bias",
227
+ tensors.get_tensor(f"{prefix}encoder.layers.{i}.self_attn.k_proj.bias").astype(np.float32),
228
  )
229
  fout.add_tensor(
230
+ f"v.blk.{i}.attn_out.weight",
231
  tensors.get_tensor(f"{prefix}encoder.layers.{i}.self_attn.out_proj.weight").astype(np.float16),
232
  )
233
  fout.add_tensor(
234
+ f"v.blk.{i}.attn_out.bias",
235
+ tensors.get_tensor(f"{prefix}encoder.layers.{i}.self_attn.out_proj.bias").astype(np.float32),
236
  )
237
  fout.add_tensor(
238
+ f"v.blk.{i}.attn_q.weight",
239
  tensors.get_tensor(f"{prefix}encoder.layers.{i}.self_attn.q_proj.weight").astype(np.float16),
240
  )
241
  fout.add_tensor(
242
+ f"v.blk.{i}.attn_q.bias",
243
+ tensors.get_tensor(f"{prefix}encoder.layers.{i}.self_attn.q_proj.bias").astype(np.float32),
244
  )
245
  fout.add_tensor(
246
+ f"v.blk.{i}.attn_v.weight",
247
  tensors.get_tensor(f"{prefix}encoder.layers.{i}.self_attn.v_proj.weight").astype(np.float16),
248
  )
249
  fout.add_tensor(
250
+ f"v.blk.{i}.attn_v.bias",
251
+ tensors.get_tensor(f"{prefix}encoder.layers.{i}.self_attn.v_proj.bias").astype(np.float32),
252
+ )
253
+
254
+ # layer norm
255
+ fout.add_tensor(
256
+ f"v.blk.{i}.ln1.weight",
257
+ tensors.get_tensor(f"{prefix}encoder.layers.{i}.layer_norm1.weight").astype(np.float32),
258
+ )
259
+ fout.add_tensor(
260
+ f"v.blk.{i}.ln1.bias",
261
+ tensors.get_tensor(f"{prefix}encoder.layers.{i}.layer_norm1.bias").astype(np.float32),
262
+ )
263
+ fout.add_tensor(
264
+ f"v.blk.{i}.ln2.weight",
265
+ tensors.get_tensor(f"{prefix}encoder.layers.{i}.layer_norm2.weight").astype(np.float32),
266
+ )
267
+ fout.add_tensor(
268
+ f"v.blk.{i}.ln2.bias",
269
+ tensors.get_tensor(f"{prefix}encoder.layers.{i}.layer_norm2.bias").astype(np.float32),
270
  )
271
 
272
  fout.add_tensor(
273
+ "v.post_ln.weight",
274
  tensors.get_tensor(f"{prefix}post_layernorm.weight").astype(np.float32),
275
  )
276
+ fout.add_tensor(
277
+ "v.post_ln.bias",
278
+ tensors.get_tensor(f"{prefix}post_layernorm.bias").astype(np.float32),
279
+ )
280
+
281
+ fout.add_tensor(
282
+ "v.pre_ln.weight",
283
+ tensors.get_tensor(f"{prefix}pre_layrnorm.weight").astype(np.float32),
284
+ )
285
+ fout.add_tensor(
286
+ "v.pre_ln.bias",
287
+ tensors.get_tensor(f"{prefix}pre_layrnorm.bias").astype(np.float32),
288
+ )
289
 
290
  fout.write_header_to_file()
291
  fout.write_kv_data_to_file()