appvoid commited on
Commit
1c4b065
1 Parent(s): 8511d9d

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +152 -0
README.md CHANGED
@@ -268,4 +268,156 @@ Traceback (most recent call last):
268
  File "/teamspace/studios/this_studio/mergekit/mergekit/io/tasks.py", line 86, in execute
269
  raise RuntimeError(
270
  RuntimeError: Tensor lm_head.weight required but not present in model meta-llama/Llama-3.2-1B
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
271
  ```
 
268
  File "/teamspace/studios/this_studio/mergekit/mergekit/io/tasks.py", line 86, in execute
269
  raise RuntimeError(
270
  RuntimeError: Tensor lm_head.weight required but not present in model meta-llama/Llama-3.2-1B
271
+ ```
272
+
273
+ which seems odd given the following output layers from llama-3-1b:
274
+
275
+ ```
276
+ model.embed_tokens.weight
277
+ model.layers.0.self_attn.q_proj.weight
278
+ model.layers.0.self_attn.k_proj.weight
279
+ model.layers.0.self_attn.v_proj.weight
280
+ model.layers.0.self_attn.o_proj.weight
281
+ model.layers.0.mlp.gate_proj.weight
282
+ model.layers.0.mlp.up_proj.weight
283
+ model.layers.0.mlp.down_proj.weight
284
+ model.layers.0.input_layernorm.weight
285
+ model.layers.0.post_attention_layernorm.weight
286
+ model.layers.1.self_attn.q_proj.weight
287
+ model.layers.1.self_attn.k_proj.weight
288
+ model.layers.1.self_attn.v_proj.weight
289
+ model.layers.1.self_attn.o_proj.weight
290
+ model.layers.1.mlp.gate_proj.weight
291
+ model.layers.1.mlp.up_proj.weight
292
+ model.layers.1.mlp.down_proj.weight
293
+ model.layers.1.input_layernorm.weight
294
+ model.layers.1.post_attention_layernorm.weight
295
+ model.layers.2.self_attn.q_proj.weight
296
+ model.layers.2.self_attn.k_proj.weight
297
+ model.layers.2.self_attn.v_proj.weight
298
+ model.layers.2.self_attn.o_proj.weight
299
+ model.layers.2.mlp.gate_proj.weight
300
+ model.layers.2.mlp.up_proj.weight
301
+ model.layers.2.mlp.down_proj.weight
302
+ model.layers.2.input_layernorm.weight
303
+ model.layers.2.post_attention_layernorm.weight
304
+ model.layers.3.self_attn.q_proj.weight
305
+ model.layers.3.self_attn.k_proj.weight
306
+ model.layers.3.self_attn.v_proj.weight
307
+ model.layers.3.self_attn.o_proj.weight
308
+ model.layers.3.mlp.gate_proj.weight
309
+ model.layers.3.mlp.up_proj.weight
310
+ model.layers.3.mlp.down_proj.weight
311
+ model.layers.3.input_layernorm.weight
312
+ model.layers.3.post_attention_layernorm.weight
313
+ model.layers.4.self_attn.q_proj.weight
314
+ model.layers.4.self_attn.k_proj.weight
315
+ model.layers.4.self_attn.v_proj.weight
316
+ model.layers.4.self_attn.o_proj.weight
317
+ model.layers.4.mlp.gate_proj.weight
318
+ model.layers.4.mlp.up_proj.weight
319
+ model.layers.4.mlp.down_proj.weight
320
+ model.layers.4.input_layernorm.weight
321
+ model.layers.4.post_attention_layernorm.weight
322
+ model.layers.5.self_attn.q_proj.weight
323
+ model.layers.5.self_attn.k_proj.weight
324
+ model.layers.5.self_attn.v_proj.weight
325
+ model.layers.5.self_attn.o_proj.weight
326
+ model.layers.5.mlp.gate_proj.weight
327
+ model.layers.5.mlp.up_proj.weight
328
+ model.layers.5.mlp.down_proj.weight
329
+ model.layers.5.input_layernorm.weight
330
+ model.layers.5.post_attention_layernorm.weight
331
+ model.layers.6.self_attn.q_proj.weight
332
+ model.layers.6.self_attn.k_proj.weight
333
+ model.layers.6.self_attn.v_proj.weight
334
+ model.layers.6.self_attn.o_proj.weight
335
+ model.layers.6.mlp.gate_proj.weight
336
+ model.layers.6.mlp.up_proj.weight
337
+ model.layers.6.mlp.down_proj.weight
338
+ model.layers.6.input_layernorm.weight
339
+ model.layers.6.post_attention_layernorm.weight
340
+ model.layers.7.self_attn.q_proj.weight
341
+ model.layers.7.self_attn.k_proj.weight
342
+ model.layers.7.self_attn.v_proj.weight
343
+ model.layers.7.self_attn.o_proj.weight
344
+ model.layers.7.mlp.gate_proj.weight
345
+ model.layers.7.mlp.up_proj.weight
346
+ model.layers.7.mlp.down_proj.weight
347
+ model.layers.7.input_layernorm.weight
348
+ model.layers.7.post_attention_layernorm.weight
349
+ model.layers.8.self_attn.q_proj.weight
350
+ model.layers.8.self_attn.k_proj.weight
351
+ model.layers.8.self_attn.v_proj.weight
352
+ model.layers.8.self_attn.o_proj.weight
353
+ model.layers.8.mlp.gate_proj.weight
354
+ model.layers.8.mlp.up_proj.weight
355
+ model.layers.8.mlp.down_proj.weight
356
+ model.layers.8.input_layernorm.weight
357
+ model.layers.8.post_attention_layernorm.weight
358
+ model.layers.9.self_attn.q_proj.weight
359
+ model.layers.9.self_attn.k_proj.weight
360
+ model.layers.9.self_attn.v_proj.weight
361
+ model.layers.9.self_attn.o_proj.weight
362
+ model.layers.9.mlp.gate_proj.weight
363
+ model.layers.9.mlp.up_proj.weight
364
+ model.layers.9.mlp.down_proj.weight
365
+ model.layers.9.input_layernorm.weight
366
+ model.layers.9.post_attention_layernorm.weight
367
+ model.layers.10.self_attn.q_proj.weight
368
+ model.layers.10.self_attn.k_proj.weight
369
+ model.layers.10.self_attn.v_proj.weight
370
+ model.layers.10.self_attn.o_proj.weight
371
+ model.layers.10.mlp.gate_proj.weight
372
+ model.layers.10.mlp.up_proj.weight
373
+ model.layers.10.mlp.down_proj.weight
374
+ model.layers.10.input_layernorm.weight
375
+ model.layers.10.post_attention_layernorm.weight
376
+ model.layers.11.self_attn.q_proj.weight
377
+ model.layers.11.self_attn.k_proj.weight
378
+ model.layers.11.self_attn.v_proj.weight
379
+ model.layers.11.self_attn.o_proj.weight
380
+ model.layers.11.mlp.gate_proj.weight
381
+ model.layers.11.mlp.up_proj.weight
382
+ model.layers.11.mlp.down_proj.weight
383
+ model.layers.11.input_layernorm.weight
384
+ model.layers.11.post_attention_layernorm.weight
385
+ model.layers.12.self_attn.q_proj.weight
386
+ model.layers.12.self_attn.k_proj.weight
387
+ model.layers.12.self_attn.v_proj.weight
388
+ model.layers.12.self_attn.o_proj.weight
389
+ model.layers.12.mlp.gate_proj.weight
390
+ model.layers.12.mlp.up_proj.weight
391
+ model.layers.12.mlp.down_proj.weight
392
+ model.layers.12.input_layernorm.weight
393
+ model.layers.12.post_attention_layernorm.weight
394
+ model.layers.13.self_attn.q_proj.weight
395
+ model.layers.13.self_attn.k_proj.weight
396
+ model.layers.13.self_attn.v_proj.weight
397
+ model.layers.13.self_attn.o_proj.weight
398
+ model.layers.13.mlp.gate_proj.weight
399
+ model.layers.13.mlp.up_proj.weight
400
+ model.layers.13.mlp.down_proj.weight
401
+ model.layers.13.input_layernorm.weight
402
+ model.layers.13.post_attention_layernorm.weight
403
+ model.layers.14.self_attn.q_proj.weight
404
+ model.layers.14.self_attn.k_proj.weight
405
+ model.layers.14.self_attn.v_proj.weight
406
+ model.layers.14.self_attn.o_proj.weight
407
+ model.layers.14.mlp.gate_proj.weight
408
+ model.layers.14.mlp.up_proj.weight
409
+ model.layers.14.mlp.down_proj.weight
410
+ model.layers.14.input_layernorm.weight
411
+ model.layers.14.post_attention_layernorm.weight
412
+ model.layers.15.self_attn.q_proj.weight
413
+ model.layers.15.self_attn.k_proj.weight
414
+ model.layers.15.self_attn.v_proj.weight
415
+ model.layers.15.self_attn.o_proj.weight
416
+ model.layers.15.mlp.gate_proj.weight
417
+ model.layers.15.mlp.up_proj.weight
418
+ model.layers.15.mlp.down_proj.weight
419
+ model.layers.15.input_layernorm.weight
420
+ model.layers.15.post_attention_layernorm.weight
421
+ model.norm.weight
422
+ lm_head.weight
423
  ```