{ "metadata": { "ParamSize": 805, "ParamBytes": 36381212672.0, "BitsPerParam": 3.4891900844756822 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 525336576, "records": [ { "name": "lm_head.q_weight", "shape": [ 1024, 128256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 525336576, "byteOffset": 0 } ], "md5sum": "efc3106ddacb59cc63dedebceab79b3b" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.78.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "7a25b047175f03bfb4426edb1ce0ecb2" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.78.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "a7dd4303c5c606109918beb7416a9efc" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.79.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "f0d4f7e4cb3e0ae34a5f9b8f0b1e8134" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.79.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "0b09b9f396bb06bec3892631fa981180" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 31145984, "records": [ { "name": "lm_head.q_scale", "shape": [ 64, 128256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16416768, "byteOffset": 0 }, { "name": "model.layers.78.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 16416768 }, { "name": "model.layers.78.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 16433152 }, { "name": "model.layers.78.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 20103168 }, { "name": "model.layers.78.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 27443200 }, { "name": "model.layers.79.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 27459584 }, { "name": "model.layers.79.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 27475968 } ], "md5sum": "c6ae61d6703d66b08bf031a3b6262a77" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.79.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "723813f2f3974f9a44a774a9aae188a0" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.79.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "8f615f44531870618180583bbd939329" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 525336576, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 128256, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 525336576, "byteOffset": 0 } ], "md5sum": "a47c1e9f873f5e3ec3f2bb7b1ace4480" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "4385b3eec159eb8924eb4fcd346fc255" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "7ee41f1a02f5cb0a9586786924d464a3" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 29835264, "records": [ { "name": "model.layers.79.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.79.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.79.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 7356416 }, { "name": "model.layers.79.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8667136 }, { "name": "model.norm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9715712 }, { "name": "model.embed_tokens.q_scale", "shape": [ 128256, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16416768, "byteOffset": 9732096 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 26148864 }, { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 26165248 } ], "md5sum": "6c3e0321d7da9cf78d5bc9473187bfe3" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.0.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "100d3b723e384ff71dc7d5baff34a694" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "ce769fee827e0187f1428d62348f045d" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "86adf68299803915e825513aa429c66a" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.1.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "b7702588790083e9959f7560e3e3203a" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "e16a6776ca6cd7bc9ce9bee44785107a" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "9330851564c5b6a45bc4efcb0e92ca19" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "f1ce0b4dc981983feae161b7b96d7860" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "61e0102a3dcb2f9600b448545a0f9214" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 26804224, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.0.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 7356416 }, { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8667136 }, { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 9715712 }, { "name": "model.layers.1.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 17055744 }, { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 18366464 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19415040 }, { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 19431424 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23101440 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23117824 }, { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 23134208 } ], "md5sum": "3e94a586cea10f2558f4958ba7c76508" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.2.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "a9b51552dd2873ce964d50ad34c9292a" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "6be6897ef08e0126d96929344ae49015" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "5d9c0ff82af87b5bfc9cb2c43a89d807" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "737f2ffd03947bf6e230fc8bdf157b72" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.3.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "6f64ca694540762d01847a2b16ac26f4" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "e13320dfce16756b076432cff186cbf2" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.4.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "494982793a4e7da159ffb0677243ce1c" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "2bb792232247f248fe291a3ce4c17004" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "c67f4e0ff153db4a67807ea677600ed7" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "3b7b93c643575ecc16e08633a272447b" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 29163520, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.2.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 7356416 }, { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8667136 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9715712 }, { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 9732096 }, { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 13402112 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20742144 }, { "name": "model.layers.3.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 20758528 }, { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22069248 }, { "name": "model.layers.4.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 23117824 }, { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 24428544 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 25477120 }, { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 25493504 } ], "md5sum": "00488de521497d7f71ae7748c5639f84" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.10.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "5c882cb4c1adcb6715d5ea9681d5e95b" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "bf00c9a3eec412b76c7c5f1b22647d2e" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "9739d439b1cfcbf1abd5c9df6d128531" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "74fe930f6ce969cbb7904a3361651fd5" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.11.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "fc5e2024849b88109f66100aae1ee196" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "62ee9a959c579342b6241ca396ffdbb6" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "a3ae660a1c898cacfdad0f3f448b10d3" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.12.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "ed7bd9d3070af208eca315451c84f87e" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.12.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "f01e8d8116cbeb2a3fa968d36be059c4" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "a4113eaf66153b9465088591812761bf" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 32833536, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.10.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 7356416 }, { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8667136 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9715712 }, { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 9732096 }, { "name": "model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 13402112 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20742144 }, { "name": "model.layers.11.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 20758528 }, { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22069248 }, { "name": "model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 23117824 }, { "name": "model.layers.12.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 30457856 }, { "name": "model.layers.12.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 31768576 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 32817152 } ], "md5sum": "4670426e324ed7569fbce06167017280" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.12.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "31ff5d16c61b4a40205e968370a9f6bb" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.13.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "c151ab1a29e1195b84044e2e5601cd6f" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "e71d785527860854690e63a6e5371b59" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.13.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "2d9f7a38f163eabf6e41d92065da3289" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.13.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "4d91a896e7fbbac293b7fd835070af7e" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.14.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "1e934b4c38c1c1b2006eb2d306737309" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "d2423a1f8c15e8db60f4c7f45a9dda71" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 28147712, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7356416 }, { "name": "model.layers.12.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 7372800 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11042816 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11059200 }, { "name": "model.layers.13.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 11075584 }, { "name": "model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 14745600 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 22085632 }, { "name": "model.layers.13.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 22102016 }, { "name": "model.layers.13.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 23412736 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24461312 }, { "name": "model.layers.14.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 24477696 } ], "md5sum": "73f078814b9f0e94d934d9c1ec9d99e2" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.14.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "deb4625fc31227b7a792f86c3dd80a6a" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.14.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "6e71e79c3bcf209c2b667042dc27c295" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.15.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "f3b16e5528be1922cc33d253bd82b5ef" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "c428c70bd5fe919710f9500ef6dcd358" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.15.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "fef3b37fdc14246d07aca57b70471478" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "22405b08bb5520300f4804b024aef9f0" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.16.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "de2908aa952bf545e3d5d547abb3cc1d" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "566c9f34f64fc322f15611ab7e829a29" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 26804224, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.14.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 7356416 }, { "name": "model.layers.14.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8667136 }, { "name": "model.layers.15.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 9715712 }, { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 11026432 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 12075008 }, { "name": "model.layers.15.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 12091392 }, { "name": "model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 15761408 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23101440 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23117824 }, { "name": "model.layers.16.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 23134208 } ], "md5sum": "ea21a7cb7f3fe0c54cd43da60dcc1f5b" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.16.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "861c0a52a6b6e7996e12df04ed592198" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.16.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "080e73ee31aba0cea375ed1ac3fc52c7" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.17.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "0f83e3863cac2a8340e2b157b451f31b" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "155e132b863153ecde95e5d74ceced89" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.17.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "9a0852967c4d28ba63e20f3f236db16f" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.17.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "d7e0604112096dc11ebdbb2f23c7b3b3" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.18.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "747b72ecacd3d47062998b4dfd77c271" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.18.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "8ad12ddaa12a9acc7e149a163d285657" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "6aa54b0f042050a48a50f4cd7e23e37e" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 28114944, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.16.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 7356416 }, { "name": "model.layers.16.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8667136 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9715712 }, { "name": "model.layers.17.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 9732096 }, { "name": "model.layers.17.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 13402112 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20742144 }, { "name": "model.layers.17.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 20758528 }, { "name": "model.layers.17.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22069248 }, { "name": "model.layers.18.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 23117824 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24428544 }, { "name": "model.layers.18.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 24444928 } ], "md5sum": "47f53a734c8a7617d45c41a76628cd10" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.18.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "c68da7ed29a56e7da8ec0dc4d2d4e2d2" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.19.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "2a1c6be6248f160b404341be40a121b0" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "e00425978633248c40126e75ea4c9f59" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.19.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "60a529e62cd3518aeed045c35d5e4f1c" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.19.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "4c29357530f11adad122a8c9e878358f" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.20.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "189ea9268029efe68aab9d52aed70d40" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "cadd06b5e1d0757a9d7eb80474382cca" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.20.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "b1e12e8d2380152e24a0221ff0c150ac" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 32817152, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.18.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 7356416 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 8404992 }, { "name": "model.layers.19.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 8421376 }, { "name": "model.layers.19.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 12091392 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19431424 }, { "name": "model.layers.19.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 19447808 }, { "name": "model.layers.19.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 20758528 }, { "name": "model.layers.20.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 21807104 }, { "name": "model.layers.20.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 25477120 } ], "md5sum": "956fc551f3763fcb2b8a0e98c7669248" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.20.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "2e6150f7c6aaee781ae0b0602b43baad" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.21.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "6a85728bff4994b7781f3d8c45b9c458" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "fc8a8df9c9ef78f368b8cf2434d140ac" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.21.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "665f025ce45914f2d32c7e075046da9b" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.21.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "ee3fc52eb0c0d578801fa34f2db8fd5c" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.22.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "251ba0b909751fa656c187671ff85e1f" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "534226dfeec6637b84edaa51b97e5db8" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.22.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "e2a0e521242be622698b800fd5463e48" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.22.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "9d84fa79d75cad6e86eb5accb2d718a1" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "d966cf8e0ce9b2e9cf42611226ac6d85" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 29196288, "records": [ { "name": "model.layers.20.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 0 }, { "name": "model.layers.20.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 1310720 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2359296 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2375680 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2392064 }, { "name": "model.layers.21.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 2408448 }, { "name": "model.layers.21.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 6078464 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 13418496 }, { "name": "model.layers.21.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 13434880 }, { "name": "model.layers.21.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 14745600 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15794176 }, { "name": "model.layers.22.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 15810560 }, { "name": "model.layers.22.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 19480576 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 26820608 }, { "name": "model.layers.22.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 26836992 }, { "name": "model.layers.22.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 28147712 } ], "md5sum": "bd3523819d43d60dd1b97ba4c71afa8a" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.23.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "391a2f0b29057510d328e8db987964dd" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.23.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "51cfa57c49ee387d73fc6368d2ce590c" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.23.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "6b7f42083b38580373939f84c80bc571" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.24.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "e1002a202b73c7588207fff110639561" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "ec637aff0176ae31218ff53334816477" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.24.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "3e3feb1c886bce465a9f53b9b4612abb" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.24.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "c0771df3cd2a821abfb52134b7d5643d" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.25.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "b49e500e8826b284075a3e4fcb158bef" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "c5d2fd5d6be545236c63d00322b7dc27" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 30490624, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.23.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 7340032 }, { "name": "model.layers.23.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8650752 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9699328 }, { "name": "model.layers.23.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 9715712 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 13385728 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 13402112 }, { "name": "model.layers.24.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 13418496 }, { "name": "model.layers.24.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 17088512 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24428544 }, { "name": "model.layers.24.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 24444928 }, { "name": "model.layers.24.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 25755648 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 26804224 }, { "name": "model.layers.25.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 26820608 } ], "md5sum": "0bec9e127f97ea12064155c4f0213bc4" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.25.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "8de873c2ff16108956491c744f2f4e1e" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.25.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "48116c7b8c0880c7efc603a294be751a" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.26.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "ca19d74e6e9718a1a88adeae04403048" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.26.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "a063ee36a226e62a264fc33058b93f17" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.26.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "7a3ff045dff7fdbddd42563c400f5a8e" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "3dd69e038c6dae678d5bf569a8b13991" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.27.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "74247eef0876806942b4b6743e2ff351" }, { "dataPath": "params_shard_107.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "86df4a7b32cb69cb6c937f2ca32aa78c" }, { "dataPath": "params_shard_108.bin", "format": "raw-shard", "nbytes": 26804224, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.25.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 7356416 }, { "name": "model.layers.25.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8667136 }, { "name": "model.layers.26.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 9715712 }, { "name": "model.layers.26.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 11026432 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 12075008 }, { "name": "model.layers.26.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 12091392 }, { "name": "model.layers.26.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 15761408 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23101440 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23117824 }, { "name": "model.layers.27.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 23134208 } ], "md5sum": "64f6738ddfbded6bb6a2087787327795" }, { "dataPath": "params_shard_109.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.27.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "bbd60ba0d1d397f5167680cef9966670" }, { "dataPath": "params_shard_110.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.27.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "5748bb4cac59bb3bf27f8cdc35442623" }, { "dataPath": "params_shard_111.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.28.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "21543d2dc0d1a78ad33f24a46b5bc798" }, { "dataPath": "params_shard_112.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "840f74966a9bf3171490c57fd0cd13c0" }, { "dataPath": "params_shard_113.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.28.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "0905edf8ab388d195fd9af4ce94024a4" }, { "dataPath": "params_shard_114.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.28.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "0ca0e965419efa975adf034bf8d38c39" }, { "dataPath": "params_shard_115.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.29.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "2da65b2089bb7ca55f153481782ac0f1" }, { "dataPath": "params_shard_116.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.29.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "ee7ab63703185e55c94953bf49488353" }, { "dataPath": "params_shard_117.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "f33e79480f836f759bdf40d137c41cab" }, { "dataPath": "params_shard_118.bin", "format": "raw-shard", "nbytes": 28114944, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.27.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 7356416 }, { "name": "model.layers.27.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8667136 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9715712 }, { "name": "model.layers.28.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 9732096 }, { "name": "model.layers.28.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 13402112 }, { "name": "model.layers.28.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20742144 }, { "name": "model.layers.28.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 20758528 }, { "name": "model.layers.28.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22069248 }, { "name": "model.layers.29.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 23117824 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24428544 }, { "name": "model.layers.29.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 24444928 } ], "md5sum": "8e5270cdc799d63823a935228ce16f27" }, { "dataPath": "params_shard_119.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.29.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "f06d4a9755ce886b46f59cf00f0c768d" }, { "dataPath": "params_shard_120.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.30.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "b31d56be5978443efbef360628b8ba77" }, { "dataPath": "params_shard_121.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "70fd9fcb35792d0a3ba528feea6f3272" }, { "dataPath": "params_shard_122.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.30.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "392dbd8f03444fb96598203dd08bfffc" }, { "dataPath": "params_shard_123.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.30.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "5e9e2806d3eb42045032c19c94d06bc1" }, { "dataPath": "params_shard_124.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.31.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "bd9a696c1c120b1f63bfa5449660e4a0" }, { "dataPath": "params_shard_125.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "958e2a20ea403ea933a3aa1ebbcf2229" }, { "dataPath": "params_shard_126.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.31.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "e28fc58c108d9c86faaf60359a027931" }, { "dataPath": "params_shard_127.bin", "format": "raw-shard", "nbytes": 32817152, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.29.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.29.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 7356416 }, { "name": "model.layers.30.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 8404992 }, { "name": "model.layers.30.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 8421376 }, { "name": "model.layers.30.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 12091392 }, { "name": "model.layers.30.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19431424 }, { "name": "model.layers.30.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 19447808 }, { "name": "model.layers.30.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 20758528 }, { "name": "model.layers.31.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 21807104 }, { "name": "model.layers.31.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 25477120 } ], "md5sum": "b0e54327c3fa09e24c0af1c8dffdf1f7" }, { "dataPath": "params_shard_128.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.31.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "05f15307d968f5500d3ff7cb76993406" }, { "dataPath": "params_shard_129.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.32.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "ec7b8eca48051799a13290856a8b1c1b" }, { "dataPath": "params_shard_130.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.32.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "4a7ceecf39afe2b99ee1901cc1a01678" }, { "dataPath": "params_shard_131.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.32.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "dad5e6994548345ebaf551db3f181fc1" }, { "dataPath": "params_shard_132.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.32.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "3a1e528435554da5c5e3ba544711d230" }, { "dataPath": "params_shard_133.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.33.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "d4c6b015169eb43c75f492af51830083" }, { "dataPath": "params_shard_134.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.33.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "f2654c8204285bd2033ddfb628b9b618" }, { "dataPath": "params_shard_135.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.33.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "0832a43e55b7eb7408a5dc6ffcf25a55" }, { "dataPath": "params_shard_136.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.33.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "97c9a80df9f103f2e871a850f20b07ce" }, { "dataPath": "params_shard_137.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.34.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "9c4e28a491c1c46295c03f56588ece48" }, { "dataPath": "params_shard_138.bin", "format": "raw-shard", "nbytes": 29196288, "records": [ { "name": "model.layers.31.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 0 }, { "name": "model.layers.31.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 1310720 }, { "name": "model.layers.31.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2359296 }, { "name": "model.layers.31.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2375680 }, { "name": "model.layers.32.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2392064 }, { "name": "model.layers.32.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 2408448 }, { "name": "model.layers.32.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 6078464 }, { "name": "model.layers.32.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 13418496 }, { "name": "model.layers.32.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 13434880 }, { "name": "model.layers.32.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 14745600 }, { "name": "model.layers.33.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15794176 }, { "name": "model.layers.33.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 15810560 }, { "name": "model.layers.33.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 19480576 }, { "name": "model.layers.33.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 26820608 }, { "name": "model.layers.33.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 26836992 }, { "name": "model.layers.33.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 28147712 } ], "md5sum": "e3b5d62336c0baf1bb8c6d1566c45acd" }, { "dataPath": "params_shard_139.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.34.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "f3cb7bec2e1aabe9552715121abaeedc" }, { "dataPath": "params_shard_140.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.34.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "4211188b6dcc46d5808a0c10ec6e787f" }, { "dataPath": "params_shard_141.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.34.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "c5d61aaa9eec1996c5d686384d0f28ac" }, { "dataPath": "params_shard_142.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.35.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "f6ac4f62ad659ffc0380a354360d9037" }, { "dataPath": "params_shard_143.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.35.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "6fbfdc842efe45f9bad50ae3be42100b" }, { "dataPath": "params_shard_144.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.35.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "89f0ed550851a865593cdf075e02a11b" }, { "dataPath": "params_shard_145.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.35.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "b19f5e4fe56d7e53f3a9ff21b8aaeb8c" }, { "dataPath": "params_shard_146.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.36.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "1f149d49e3553d7d4f7ee132bf7d3d57" }, { "dataPath": "params_shard_147.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.36.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "2b27cdb677536bc8c1ecd3ea9271e763" }, { "dataPath": "params_shard_148.bin", "format": "raw-shard", "nbytes": 30490624, "records": [ { "name": "model.layers.34.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.34.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 7340032 }, { "name": "model.layers.34.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8650752 }, { "name": "model.layers.34.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9699328 }, { "name": "model.layers.34.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 9715712 }, { "name": "model.layers.34.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 13385728 }, { "name": "model.layers.35.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 13402112 }, { "name": "model.layers.35.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 13418496 }, { "name": "model.layers.35.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 17088512 }, { "name": "model.layers.35.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24428544 }, { "name": "model.layers.35.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 24444928 }, { "name": "model.layers.35.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 25755648 }, { "name": "model.layers.36.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 26804224 }, { "name": "model.layers.36.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 26820608 } ], "md5sum": "daf7f2a132ae893340d0ae4063b521bc" }, { "dataPath": "params_shard_149.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.36.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "cdac92fa29ee8bc501c922a9067e0c32" }, { "dataPath": "params_shard_150.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.36.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "d7da05fe9083db998b0539e66d5902b8" }, { "dataPath": "params_shard_151.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.37.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "9ad9139b253c237458ef748b09517bb7" }, { "dataPath": "params_shard_152.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.37.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "8253c4d3d0f0c01e2bb5fcc6815caf7f" }, { "dataPath": "params_shard_153.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.37.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "5cfb066cc3465310bcffc51619c538c6" }, { "dataPath": "params_shard_154.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.37.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "2ec69575c6e32a584d220c6c6ec1b355" }, { "dataPath": "params_shard_155.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.38.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "39b840c448badd3ac146a881912ef42e" }, { "dataPath": "params_shard_156.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.38.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "0363ae21518e693a4c5c9356907459e1" }, { "dataPath": "params_shard_157.bin", "format": "raw-shard", "nbytes": 26804224, "records": [ { "name": "model.layers.36.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.36.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.36.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 7356416 }, { "name": "model.layers.36.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8667136 }, { "name": "model.layers.37.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 9715712 }, { "name": "model.layers.37.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 11026432 }, { "name": "model.layers.37.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 12075008 }, { "name": "model.layers.37.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 12091392 }, { "name": "model.layers.37.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 15761408 }, { "name": "model.layers.37.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23101440 }, { "name": "model.layers.38.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23117824 }, { "name": "model.layers.38.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 23134208 } ], "md5sum": "5c254a851912172a9f90362d3ab2ebbf" }, { "dataPath": "params_shard_158.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.38.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "7e413d9d8afcf05592f85c783e484cd7" }, { "dataPath": "params_shard_159.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.38.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "bc58996c19b91891683d9144d70501b0" }, { "dataPath": "params_shard_160.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.39.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "9dcd34b3d21c19dbd38f696b668cc529" }, { "dataPath": "params_shard_161.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.39.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "67ba6b14be125d31b679c0be492a52d7" }, { "dataPath": "params_shard_162.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.39.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "b0b5fab7f2c21fb98df33e0d0a85c7f1" }, { "dataPath": "params_shard_163.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.39.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "3b54f6f08c4445242c0d921da659d01a" }, { "dataPath": "params_shard_164.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.40.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "ebf73a03a212a5b34d5375f832bb6219" }, { "dataPath": "params_shard_165.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "eca9905f2788055a89cf0ae45be616d2" }, { "dataPath": "params_shard_166.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "df2846697c5188e9a8cf9a53a450d4f8" }, { "dataPath": "params_shard_167.bin", "format": "raw-shard", "nbytes": 28114944, "records": [ { "name": "model.layers.38.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.38.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.38.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 7356416 }, { "name": "model.layers.38.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8667136 }, { "name": "model.layers.39.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9715712 }, { "name": "model.layers.39.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 9732096 }, { "name": "model.layers.39.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 13402112 }, { "name": "model.layers.39.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20742144 }, { "name": "model.layers.39.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 20758528 }, { "name": "model.layers.39.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22069248 }, { "name": "model.layers.40.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 23117824 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24428544 }, { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 24444928 } ], "md5sum": "30e888d5f3841c19f47996c8f7d7aa86" }, { "dataPath": "params_shard_168.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "c3b96ca03ff0b394486f3fa5c1647b68" }, { "dataPath": "params_shard_169.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "5e6db1db65bd7816df247160df09afe1" }, { "dataPath": "params_shard_170.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.5.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "f0790ce030aba5e1eb1af5cd91ee953a" }, { "dataPath": "params_shard_171.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "81bee44918259b9b74b9dfb30fc3b5e0" }, { "dataPath": "params_shard_172.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "8d841cb4c37005fefde09f9a871b20ba" }, { "dataPath": "params_shard_173.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "73850ba289394fd739872c5458abafb5" }, { "dataPath": "params_shard_174.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.6.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "d37019cf27960f6e5b1fc1387c16a4ed" }, { "dataPath": "params_shard_175.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "5a68d4f6bdf9856f00ffb629556604a6" }, { "dataPath": "params_shard_176.bin", "format": "raw-shard", "nbytes": 33112064, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7356416 }, { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 7372800 }, { "name": "model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 11042816 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 18382848 }, { "name": "model.layers.5.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 18399232 }, { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 19709952 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20758528 }, { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 20774912 }, { "name": "model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 24444928 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 31784960 }, { "name": "model.layers.6.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 31801344 } ], "md5sum": "127630d56bf701fbc638b10c2dc4e39d" }, { "dataPath": "params_shard_177.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.7.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "dba3f00df9098950ea0e8cd88e694c90" }, { "dataPath": "params_shard_178.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.40.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "e6f54da4b58c12bc98e6aa766fb4e2b1" }, { "dataPath": "params_shard_179.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.40.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "1922e6bd10a1b480bd29c0f599d21e1d" }, { "dataPath": "params_shard_180.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.40.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "e1faa5d7306cfa2cf9101262cfcec617" }, { "dataPath": "params_shard_181.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.41.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "457ac032b83d00fce423ea8c183e6848" }, { "dataPath": "params_shard_182.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.41.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "6804489a95a23a6593b56ca05d46bbce" }, { "dataPath": "params_shard_183.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.41.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "674d35b5f58a83f8abd64cdce208a649" }, { "dataPath": "params_shard_184.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.41.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "7bb6a98385aa75325bdeb15f36f3e50c" }, { "dataPath": "params_shard_185.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.42.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "bda229d924b08b3b89a1c236e26c1ce5" }, { "dataPath": "params_shard_186.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.42.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "dcdda173dfdb9b4d322daa23a70a0e0d" }, { "dataPath": "params_shard_187.bin", "format": "raw-shard", "nbytes": 31522816, "records": [ { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 0 }, { "name": "model.layers.7.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 1048576 }, { "name": "model.layers.40.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2359296 }, { "name": "model.layers.40.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 2375680 }, { "name": "model.layers.40.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 6045696 }, { "name": "model.layers.40.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 13385728 }, { "name": "model.layers.40.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 13402112 }, { "name": "model.layers.41.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14450688 }, { "name": "model.layers.41.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 14467072 }, { "name": "model.layers.41.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 18137088 }, { "name": "model.layers.41.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 25477120 }, { "name": "model.layers.41.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 25493504 }, { "name": "model.layers.41.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26804224 }, { "name": "model.layers.42.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 27852800 } ], "md5sum": "88669cf82177da71f80d59c2c1c76840" }, { "dataPath": "params_shard_188.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.42.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "d99aeaf2dab6d205e7b744d51717de5e" }, { "dataPath": "params_shard_189.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.42.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "23718929537acb733cb926742016ec4a" }, { "dataPath": "params_shard_190.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.43.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "2df3272fb3991fa48a63dab1c0fc9b30" }, { "dataPath": "params_shard_191.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.43.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "7afedbbc1937616cd66c62cb71c268f5" }, { "dataPath": "params_shard_192.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.43.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "f5a6d49ec88aabb9524d25a12489e0c2" }, { "dataPath": "params_shard_193.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.43.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "e32bbe8b33aea5e409a2977244adaaa2" }, { "dataPath": "params_shard_194.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.44.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "862018c76c1c95a18f2c54b3104a41c4" }, { "dataPath": "params_shard_195.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.44.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "22e7d4217fd6016d9dae21f417ae1fd6" }, { "dataPath": "params_shard_196.bin", "format": "raw-shard", "nbytes": 26820608, "records": [ { "name": "model.layers.42.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.42.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 7340032 }, { "name": "model.layers.42.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8650752 }, { "name": "model.layers.42.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9699328 }, { "name": "model.layers.42.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9715712 }, { "name": "model.layers.43.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9732096 }, { "name": "model.layers.43.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 9748480 }, { "name": "model.layers.43.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 13418496 }, { "name": "model.layers.43.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20758528 }, { "name": "model.layers.43.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 20774912 }, { "name": "model.layers.43.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22085632 }, { "name": "model.layers.44.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23134208 }, { "name": "model.layers.44.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 23150592 } ], "md5sum": "458e4fa3482e1b185d232409c359b7c7" }, { "dataPath": "params_shard_197.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.44.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "7f0395762e50ae39bbd24be2df16a7e2" }, { "dataPath": "params_shard_198.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.44.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "3eaab080f966efd54d6b70ca64620e36" }, { "dataPath": "params_shard_199.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.45.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "f135138b00432f4cb938985306171d10" }, { "dataPath": "params_shard_200.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.45.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "a9e9811d57f3bde0812c5787fda528aa" }, { "dataPath": "params_shard_201.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.45.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "7b69a97510ce6129521b3ac130cbcbf4" }, { "dataPath": "params_shard_202.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.45.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "a7bc9cb09a0fefbde89b161c796c9f73" }, { "dataPath": "params_shard_203.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.46.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "593d9efaa67bc2af24fcbc2bb8fe16bd" }, { "dataPath": "params_shard_204.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.46.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "fa71f38aa55be451aecb975b2b709346" }, { "dataPath": "params_shard_205.bin", "format": "raw-shard", "nbytes": 26804224, "records": [ { "name": "model.layers.44.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.44.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.44.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 7356416 }, { "name": "model.layers.44.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8667136 }, { "name": "model.layers.45.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 9715712 }, { "name": "model.layers.45.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 17055744 }, { "name": "model.layers.45.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 18366464 }, { "name": "model.layers.45.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19415040 }, { "name": "model.layers.45.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 19431424 }, { "name": "model.layers.45.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23101440 }, { "name": "model.layers.46.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23117824 }, { "name": "model.layers.46.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 23134208 } ], "md5sum": "d1000c8963a0c0c5d28e3af535bd897b" }, { "dataPath": "params_shard_206.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.46.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "13f6e6abda3ad8dbc9ad7171ea05a682" }, { "dataPath": "params_shard_207.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.46.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "c9e7cee6bee1bec25bbe4b23e9435fe6" }, { "dataPath": "params_shard_208.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.47.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "252571e1502eb78521870312c63d9f14" }, { "dataPath": "params_shard_209.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.47.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "3102e6536b5fba837834641363cb84c4" }, { "dataPath": "params_shard_210.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.47.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "affac2d14768232d851e7a9c683451e9" }, { "dataPath": "params_shard_211.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.47.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "59789dcaa5c73e470558cb15832d1a58" }, { "dataPath": "params_shard_212.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.48.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "7876d3b931a84103b259580f606129fb" }, { "dataPath": "params_shard_213.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.48.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "7f5912b447b889d50ef0b5e46ffc7c05" }, { "dataPath": "params_shard_214.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.48.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "ff723da59e5d1a7c753d6cab7798dad8" }, { "dataPath": "params_shard_215.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.48.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "865b6c7218499a6546e75e4fcd5512d3" }, { "dataPath": "params_shard_216.bin", "format": "raw-shard", "nbytes": 29163520, "records": [ { "name": "model.layers.46.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.46.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.46.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 7356416 }, { "name": "model.layers.46.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8667136 }, { "name": "model.layers.47.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9715712 }, { "name": "model.layers.47.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 9732096 }, { "name": "model.layers.47.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 13402112 }, { "name": "model.layers.47.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20742144 }, { "name": "model.layers.47.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 20758528 }, { "name": "model.layers.47.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22069248 }, { "name": "model.layers.48.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 23117824 }, { "name": "model.layers.48.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 24428544 }, { "name": "model.layers.48.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 25477120 }, { "name": "model.layers.48.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 25493504 } ], "md5sum": "6c8cf30f98568dac7f5697f08af4fdf2" }, { "dataPath": "params_shard_217.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.49.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "38e5ee15b70a49ea98151b0501c3229b" }, { "dataPath": "params_shard_218.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.49.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "7c940e16c1714fbc720e07399c821794" }, { "dataPath": "params_shard_219.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.49.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "815352f8b70dc62ed7f15749509c83e8" }, { "dataPath": "params_shard_220.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.49.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "5f3dde271d2f524e7b95ab0bf1ee3153" }, { "dataPath": "params_shard_221.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.50.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "efd8d4298a07d5b54b528a8bdd745e15" }, { "dataPath": "params_shard_222.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.50.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "eb6493fe03963b2786b811c6b0b037ba" }, { "dataPath": "params_shard_223.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.50.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "94c0104f30cf61f3eae1f4fe5c6833dc" }, { "dataPath": "params_shard_224.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.50.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "a376be6567694850d401edba249be6a5" }, { "dataPath": "params_shard_225.bin", "format": "raw-shard", "nbytes": 33112064, "records": [ { "name": "model.layers.48.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.48.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.49.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7356416 }, { "name": "model.layers.49.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 7372800 }, { "name": "model.layers.49.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 11042816 }, { "name": "model.layers.49.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 18382848 }, { "name": "model.layers.49.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 18399232 }, { "name": "model.layers.49.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 19709952 }, { "name": "model.layers.50.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20758528 }, { "name": "model.layers.50.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 20774912 }, { "name": "model.layers.50.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 24444928 }, { "name": "model.layers.50.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 31784960 }, { "name": "model.layers.50.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 31801344 } ], "md5sum": "3cdd6043268228a3e5b40638d71df55e" }, { "dataPath": "params_shard_226.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.51.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "42a20c3b86cee38c1696c62fc7f59996" }, { "dataPath": "params_shard_227.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.51.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "c0e84e31ac637446b5b3a44a828bec02" }, { "dataPath": "params_shard_228.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.51.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "01d4ebd83846deab678ac1b688da6ffa" }, { "dataPath": "params_shard_229.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.51.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "a69f69bdaea29bc0cb160334e2d65720" }, { "dataPath": "params_shard_230.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.52.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "ce684a3e1939a3d6b279c0eb7e3bf8e2" }, { "dataPath": "params_shard_231.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.52.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "591cc99dff8bfe5b6f7b467aa279bf79" }, { "dataPath": "params_shard_232.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.52.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "6c0f9b2fa4442a3210577ce175d33dc6" }, { "dataPath": "params_shard_233.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.52.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "0c0b02fe74de5fe320bbeed23e335dd8" }, { "dataPath": "params_shard_234.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.53.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "154c10c145a2f89f8f6066e798c0de3d" }, { "dataPath": "params_shard_235.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.53.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "695d269045254fd9fe89da2e6be9e936" }, { "dataPath": "params_shard_236.bin", "format": "raw-shard", "nbytes": 31522816, "records": [ { "name": "model.layers.50.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 0 }, { "name": "model.layers.51.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 1048576 }, { "name": "model.layers.51.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2359296 }, { "name": "model.layers.51.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 2375680 }, { "name": "model.layers.51.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 6045696 }, { "name": "model.layers.51.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 13385728 }, { "name": "model.layers.51.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 13402112 }, { "name": "model.layers.52.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14450688 }, { "name": "model.layers.52.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 14467072 }, { "name": "model.layers.52.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 18137088 }, { "name": "model.layers.52.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 25477120 }, { "name": "model.layers.52.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 25493504 }, { "name": "model.layers.52.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26804224 }, { "name": "model.layers.53.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 27852800 } ], "md5sum": "bbde34dbe9a9b5e4553843fb2fc6df4c" }, { "dataPath": "params_shard_237.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.53.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "719d801f697adff0f3b5e16aef9b434e" }, { "dataPath": "params_shard_238.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.53.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "63e39f13402fd48b92b24935499606d1" }, { "dataPath": "params_shard_239.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.54.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "58f625f6e5b6b0ac8b4e96374b22403b" }, { "dataPath": "params_shard_240.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.54.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "2623c32c8a5cc28b68af7febfff79845" }, { "dataPath": "params_shard_241.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.54.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "29ef88adb377286b8901188c4de74091" }, { "dataPath": "params_shard_242.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.54.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "d4ccf1733ba7c6ee09a792d414611a42" }, { "dataPath": "params_shard_243.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.55.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "fbd253267700c6599b515e8e6a5911c5" }, { "dataPath": "params_shard_244.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.55.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "2aa203291f0181f929c47cb88bf2fd59" }, { "dataPath": "params_shard_245.bin", "format": "raw-shard", "nbytes": 26820608, "records": [ { "name": "model.layers.53.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.53.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 7340032 }, { "name": "model.layers.53.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8650752 }, { "name": "model.layers.53.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9699328 }, { "name": "model.layers.53.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9715712 }, { "name": "model.layers.54.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9732096 }, { "name": "model.layers.54.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 9748480 }, { "name": "model.layers.54.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 13418496 }, { "name": "model.layers.54.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20758528 }, { "name": "model.layers.54.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 20774912 }, { "name": "model.layers.54.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22085632 }, { "name": "model.layers.55.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23134208 }, { "name": "model.layers.55.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 23150592 } ], "md5sum": "cc942c049ece0d0cb5318933da99490c" }, { "dataPath": "params_shard_246.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.55.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "4046ac49e0e5fc48afce723b66eb46ac" }, { "dataPath": "params_shard_247.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.55.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "bdb1207ea9c9e9b0093915262d3b94dd" }, { "dataPath": "params_shard_248.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.56.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "159a5ad9069b359e7a8f4c581b0f7bfc" }, { "dataPath": "params_shard_249.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.56.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "88d9dca28ed574b72a043f341315c578" }, { "dataPath": "params_shard_250.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.56.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "e4f5af38b58c6bc7240548bb70f37897" }, { "dataPath": "params_shard_251.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.56.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "83d2f43a109935ed821b917f5d66f91e" }, { "dataPath": "params_shard_252.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.57.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "8721ad103834bb83b832e7c1fa6af9cf" }, { "dataPath": "params_shard_253.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.57.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "2aab56b97726c46b6969161619db4463" }, { "dataPath": "params_shard_254.bin", "format": "raw-shard", "nbytes": 26804224, "records": [ { "name": "model.layers.55.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.55.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.55.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 7356416 }, { "name": "model.layers.55.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8667136 }, { "name": "model.layers.56.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 9715712 }, { "name": "model.layers.56.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 17055744 }, { "name": "model.layers.56.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 18366464 }, { "name": "model.layers.56.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19415040 }, { "name": "model.layers.56.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 19431424 }, { "name": "model.layers.56.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23101440 }, { "name": "model.layers.57.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23117824 }, { "name": "model.layers.57.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 23134208 } ], "md5sum": "981055cb1b4cc8b5b939167d7ef0bc61" }, { "dataPath": "params_shard_255.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.57.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "df82ff0c97890cce9d43d77c8d1b70b8" }, { "dataPath": "params_shard_256.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.57.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "0eccbf43337e6bda67114d0c0da85aa8" }, { "dataPath": "params_shard_257.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.58.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "67720c93f3a9491a55725ddfa448a964" }, { "dataPath": "params_shard_258.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.58.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "c07da7072aa5f68d6543e383f9dee905" }, { "dataPath": "params_shard_259.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.58.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "bf695ef01bf47f2daaf9d514c2c7e496" }, { "dataPath": "params_shard_260.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.58.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "77d6f16f15eeaf8332d8b416c56083d1" }, { "dataPath": "params_shard_261.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.59.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "a57c3001d8e7f7d3745e37ef6b9ce831" }, { "dataPath": "params_shard_262.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.59.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "eedba7db80ac0b68af589427f93fdbf7" }, { "dataPath": "params_shard_263.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.59.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "36b3148f8ca2704baeb656853c5bd043" }, { "dataPath": "params_shard_264.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.59.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "e6e9c90b29aa9319b1369ca12b529d2a" }, { "dataPath": "params_shard_265.bin", "format": "raw-shard", "nbytes": 29163520, "records": [ { "name": "model.layers.57.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.57.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.57.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 7356416 }, { "name": "model.layers.57.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8667136 }, { "name": "model.layers.58.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9715712 }, { "name": "model.layers.58.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 9732096 }, { "name": "model.layers.58.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 13402112 }, { "name": "model.layers.58.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20742144 }, { "name": "model.layers.58.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 20758528 }, { "name": "model.layers.58.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22069248 }, { "name": "model.layers.59.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 23117824 }, { "name": "model.layers.59.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 24428544 }, { "name": "model.layers.59.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 25477120 }, { "name": "model.layers.59.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 25493504 } ], "md5sum": "f04c4874948a332739d45200c6864f31" }, { "dataPath": "params_shard_266.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.60.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "80316b2405a54eea43ca8e0991036503" }, { "dataPath": "params_shard_267.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.60.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "2cbbd4915540dc68f96bd82bd6e9a1c3" }, { "dataPath": "params_shard_268.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.60.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "cc0623878eaedd1efaa87a770b723aa5" }, { "dataPath": "params_shard_269.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.60.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "97d261d15cbd90299359207bb8162756" }, { "dataPath": "params_shard_270.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.61.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "2b22f2966015a4cfb00890e2879e9d61" }, { "dataPath": "params_shard_271.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.61.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "ee46ea02b315c22ea05d3381801ac80a" }, { "dataPath": "params_shard_272.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.61.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "218d09b91b3094b76a8bbb64fbce5ab7" }, { "dataPath": "params_shard_273.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.61.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "94f6b4931fc97f6e08fe9ef946bf02af" }, { "dataPath": "params_shard_274.bin", "format": "raw-shard", "nbytes": 33112064, "records": [ { "name": "model.layers.59.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.59.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.60.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7356416 }, { "name": "model.layers.60.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 7372800 }, { "name": "model.layers.60.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 11042816 }, { "name": "model.layers.60.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 18382848 }, { "name": "model.layers.60.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 18399232 }, { "name": "model.layers.60.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 19709952 }, { "name": "model.layers.61.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20758528 }, { "name": "model.layers.61.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 20774912 }, { "name": "model.layers.61.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 24444928 }, { "name": "model.layers.61.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 31784960 }, { "name": "model.layers.61.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 31801344 } ], "md5sum": "43216edb60c39f908cd84a11423c6b13" }, { "dataPath": "params_shard_275.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.62.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "803031bae4e621509f87b4ac98f160c2" }, { "dataPath": "params_shard_276.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.62.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "38eb1f773d069c00399f7f2a996f13f7" }, { "dataPath": "params_shard_277.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.62.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "58454479a334c866af7ff767db9b9ca3" }, { "dataPath": "params_shard_278.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.62.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "8a05f9ab6b979d6973c8fa1c1fd98355" }, { "dataPath": "params_shard_279.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.63.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "364fba33d2fbf28204db9ad61fecae53" }, { "dataPath": "params_shard_280.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.63.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "af38650a9e50c8195784945abc5408e8" }, { "dataPath": "params_shard_281.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.63.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "f2a0b113ba64c9995b0cb122dc97911d" }, { "dataPath": "params_shard_282.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.63.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "6e51c4cfeba127162b9ae64f37571bf8" }, { "dataPath": "params_shard_283.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.64.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "90249134d4dd72bf0df326e47870511d" }, { "dataPath": "params_shard_284.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.64.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "6302896d2f31b856d8776288f06094b9" }, { "dataPath": "params_shard_285.bin", "format": "raw-shard", "nbytes": 31522816, "records": [ { "name": "model.layers.61.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 0 }, { "name": "model.layers.62.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 1048576 }, { "name": "model.layers.62.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2359296 }, { "name": "model.layers.62.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 2375680 }, { "name": "model.layers.62.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 6045696 }, { "name": "model.layers.62.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 13385728 }, { "name": "model.layers.62.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 13402112 }, { "name": "model.layers.63.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14450688 }, { "name": "model.layers.63.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 14467072 }, { "name": "model.layers.63.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 18137088 }, { "name": "model.layers.63.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 25477120 }, { "name": "model.layers.63.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 25493504 }, { "name": "model.layers.63.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26804224 }, { "name": "model.layers.64.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 27852800 } ], "md5sum": "0019039f5b9ee0a1f59c59ab4a15b1e8" }, { "dataPath": "params_shard_286.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.64.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "76f257d141f78bc92eea104b91c33038" }, { "dataPath": "params_shard_287.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.64.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "41d48f7343b975ee72b028d6b5d3c798" }, { "dataPath": "params_shard_288.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.65.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "e686e2add6f5e2c924e283fff0a9dc44" }, { "dataPath": "params_shard_289.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.65.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "72fe01979968644fbf0548651f43120b" }, { "dataPath": "params_shard_290.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.65.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "ef1ef431e69852b40353b3eb417a7f40" }, { "dataPath": "params_shard_291.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.65.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "c417acb7aecc7942a252aef57ac63116" }, { "dataPath": "params_shard_292.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.66.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "37c4946034dfbb1da6f6479dc0994f57" }, { "dataPath": "params_shard_293.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.66.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "33b918d7f77d8b18f2d272ef393760b8" }, { "dataPath": "params_shard_294.bin", "format": "raw-shard", "nbytes": 26820608, "records": [ { "name": "model.layers.64.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.64.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 7340032 }, { "name": "model.layers.64.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8650752 }, { "name": "model.layers.64.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9699328 }, { "name": "model.layers.64.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9715712 }, { "name": "model.layers.65.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9732096 }, { "name": "model.layers.65.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 9748480 }, { "name": "model.layers.65.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 13418496 }, { "name": "model.layers.65.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20758528 }, { "name": "model.layers.65.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 20774912 }, { "name": "model.layers.65.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22085632 }, { "name": "model.layers.66.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23134208 }, { "name": "model.layers.66.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 23150592 } ], "md5sum": "5391fdc8cbdc0326eee6e8c0d37ff597" }, { "dataPath": "params_shard_295.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.66.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "7717d9dc57034e498d858ffd9a0b1528" }, { "dataPath": "params_shard_296.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.66.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "ad9088f061b7eb1f6ad4fd1be22125d1" }, { "dataPath": "params_shard_297.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.67.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "b65dd7e72758407035c55980c8c825f6" }, { "dataPath": "params_shard_298.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.67.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "c5483f503d27397c76a35c3ffb9dfaca" }, { "dataPath": "params_shard_299.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.67.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "81ea8e3e665c6725635cc0bd6bc557c4" }, { "dataPath": "params_shard_300.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.67.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "8e532569723405ed01bf267cd1f610f1" }, { "dataPath": "params_shard_301.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.68.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "d93652468b35716f9d11ccd5abcf635c" }, { "dataPath": "params_shard_302.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.68.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "6582b95c40208c5443aed8b98efe1871" }, { "dataPath": "params_shard_303.bin", "format": "raw-shard", "nbytes": 26804224, "records": [ { "name": "model.layers.66.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.66.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.66.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 7356416 }, { "name": "model.layers.66.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8667136 }, { "name": "model.layers.67.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 9715712 }, { "name": "model.layers.67.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 17055744 }, { "name": "model.layers.67.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 18366464 }, { "name": "model.layers.67.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19415040 }, { "name": "model.layers.67.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 19431424 }, { "name": "model.layers.67.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23101440 }, { "name": "model.layers.68.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23117824 }, { "name": "model.layers.68.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 23134208 } ], "md5sum": "5956eefbdd09293277c6014adddc6053" }, { "dataPath": "params_shard_304.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.68.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "2b8720d94988441028da10985b695ef2" }, { "dataPath": "params_shard_305.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.68.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "bf0e454464632343ccc45db1954335fe" }, { "dataPath": "params_shard_306.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.69.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "16a1df079cb77d9e77e6aa73b943885d" }, { "dataPath": "params_shard_307.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.69.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "0d6c384bcb21e463671b613b5fcbdb15" }, { "dataPath": "params_shard_308.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.69.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "1826c9bfb84a2a451c4a140936d1334b" }, { "dataPath": "params_shard_309.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.69.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "4c6b7b886f8e9767d612a2de3f42b3f4" }, { "dataPath": "params_shard_310.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.70.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "6c43201d36347dee7325666ab4da1ca2" }, { "dataPath": "params_shard_311.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.70.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "96e074aea679477e15ad4ba658859385" }, { "dataPath": "params_shard_312.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "c4d532c6507b493ee1024c67d25e1211" }, { "dataPath": "params_shard_313.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "b05043f5dcd2a1af816b6acc8611d23e" }, { "dataPath": "params_shard_314.bin", "format": "raw-shard", "nbytes": 29163520, "records": [ { "name": "model.layers.68.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.68.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.68.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 7356416 }, { "name": "model.layers.68.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8667136 }, { "name": "model.layers.69.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9715712 }, { "name": "model.layers.69.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 9732096 }, { "name": "model.layers.69.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 13402112 }, { "name": "model.layers.69.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20742144 }, { "name": "model.layers.69.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 20758528 }, { "name": "model.layers.69.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22069248 }, { "name": "model.layers.70.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 23117824 }, { "name": "model.layers.70.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 24428544 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 25477120 }, { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 25493504 } ], "md5sum": "85c5ef07c72e3524de611c294fb606b4" }, { "dataPath": "params_shard_315.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "1723bfbb3476024d0706df599747273c" }, { "dataPath": "params_shard_316.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "1e95c5decb3038716a6c6f31bc5ebeb6" }, { "dataPath": "params_shard_317.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "81859bdf8c63b3494d988c44356021fe" }, { "dataPath": "params_shard_318.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.8.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "8b2bf1374781a9ef97db037171b3756e" }, { "dataPath": "params_shard_319.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "41d26dd308715e2c9a0abcc212aeafed" }, { "dataPath": "params_shard_320.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "ebfa00d6ff7106925a2739d210fb3901" }, { "dataPath": "params_shard_321.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.9.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "ffd877bc37a4206fc3ae95fa501fda8b" }, { "dataPath": "params_shard_322.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "6d57a843823cd3efd37fde1deceb95aa" }, { "dataPath": "params_shard_323.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.70.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "5703c1455937e3970da59678946d84da" }, { "dataPath": "params_shard_324.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.70.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "6ac010c672c0f2d63ee8c5045a23781e" }, { "dataPath": "params_shard_325.bin", "format": "raw-shard", "nbytes": 31522816, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 7356416 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 8404992 }, { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 8421376 }, { "name": "model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 12091392 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19431424 }, { "name": "model.layers.8.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 19447808 }, { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 20758528 }, { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 21807104 }, { "name": "model.layers.9.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 25477120 }, { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26787840 }, { "name": "model.layers.70.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 27836416 }, { "name": "model.layers.70.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 27852800 } ], "md5sum": "18b0ead3016bb695ef411fda12330223" }, { "dataPath": "params_shard_326.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.71.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "bc3d10f2ad79ec95a218bf5f23840855" }, { "dataPath": "params_shard_327.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.71.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "232344677a8e2d5106f74fd9fd956050" }, { "dataPath": "params_shard_328.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.71.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "f77c267540d08ca3d79ef3c47b67da12" }, { "dataPath": "params_shard_329.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.71.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "5738d2559d38d5ceb41ecefa7136697c" }, { "dataPath": "params_shard_330.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.72.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "de8f23305eb94fdb146e701ef5d15554" }, { "dataPath": "params_shard_331.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.72.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "ccc89bf61bd0133ea6f88669f8b7f577" }, { "dataPath": "params_shard_332.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.72.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "f809c4004294f738cdc1e188b8736b2c" }, { "dataPath": "params_shard_333.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.72.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "9437ff152606540c8cd6759bd19cc874" }, { "dataPath": "params_shard_334.bin", "format": "raw-shard", "nbytes": 33112064, "records": [ { "name": "model.layers.70.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.70.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.71.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7356416 }, { "name": "model.layers.71.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 7372800 }, { "name": "model.layers.71.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 11042816 }, { "name": "model.layers.71.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 18382848 }, { "name": "model.layers.71.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 18399232 }, { "name": "model.layers.71.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 19709952 }, { "name": "model.layers.72.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20758528 }, { "name": "model.layers.72.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 20774912 }, { "name": "model.layers.72.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 24444928 }, { "name": "model.layers.72.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 31784960 }, { "name": "model.layers.72.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 31801344 } ], "md5sum": "75a3d0d296101b4911fc06db743b7caf" }, { "dataPath": "params_shard_335.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.73.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "9a064823c951232cf901bd7c6dde53a1" }, { "dataPath": "params_shard_336.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.73.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "2ca51e9db1fa1858952c80d387df33a8" }, { "dataPath": "params_shard_337.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.73.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "9ad6fb5d0c7eb6cf9997c9cf11984ef6" }, { "dataPath": "params_shard_338.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.73.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "8b1816538862d71cdeec08394a79ae1c" }, { "dataPath": "params_shard_339.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.74.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "7476846f4504aa3b7dae23667974fadb" }, { "dataPath": "params_shard_340.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.74.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "ea0c2e0d53d6f114466e209d9c19d7cb" }, { "dataPath": "params_shard_341.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.74.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "aca197c9f851f8e5e47762459da959e1" }, { "dataPath": "params_shard_342.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.74.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "79a82d6370198de1f08864887d70f226" }, { "dataPath": "params_shard_343.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.75.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "431f2d721781b0ac26977c63ef4c53e8" }, { "dataPath": "params_shard_344.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.75.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "b494ef6c9a44e22d81863a4caa06ca2d" }, { "dataPath": "params_shard_345.bin", "format": "raw-shard", "nbytes": 31522816, "records": [ { "name": "model.layers.72.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 0 }, { "name": "model.layers.73.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 1048576 }, { "name": "model.layers.73.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2359296 }, { "name": "model.layers.73.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 2375680 }, { "name": "model.layers.73.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 6045696 }, { "name": "model.layers.73.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 13385728 }, { "name": "model.layers.73.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 13402112 }, { "name": "model.layers.74.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14450688 }, { "name": "model.layers.74.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 14467072 }, { "name": "model.layers.74.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 18137088 }, { "name": "model.layers.74.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 25477120 }, { "name": "model.layers.74.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 25493504 }, { "name": "model.layers.74.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26804224 }, { "name": "model.layers.75.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 27852800 } ], "md5sum": "b44b1d2631b6444fe749ef4fd4397896" }, { "dataPath": "params_shard_346.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.75.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "b5526d1439464605f510ce3598b39306" }, { "dataPath": "params_shard_347.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.75.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "08eaad09b4359c2c4d4df6842f394f53" }, { "dataPath": "params_shard_348.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.76.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "50ea7d899787c8e5020f831f948def82" }, { "dataPath": "params_shard_349.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.76.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "cc16087f1ee8c24d38f84041e46498e6" }, { "dataPath": "params_shard_350.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.76.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "84ed75fe667351799e4f1c028e2ccbb4" }, { "dataPath": "params_shard_351.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.76.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "f6bcfaa7cc1939efa64ccaa78b372525" }, { "dataPath": "params_shard_352.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.77.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "a803033c73fc7f722930dd2f08cb5a4e" }, { "dataPath": "params_shard_353.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.77.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "ecc634fec68f0b00851440e5c006d589" }, { "dataPath": "params_shard_354.bin", "format": "raw-shard", "nbytes": 26820608, "records": [ { "name": "model.layers.75.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.75.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 7340032 }, { "name": "model.layers.75.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8650752 }, { "name": "model.layers.75.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9699328 }, { "name": "model.layers.75.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9715712 }, { "name": "model.layers.76.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9732096 }, { "name": "model.layers.76.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 9748480 }, { "name": "model.layers.76.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 13418496 }, { "name": "model.layers.76.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20758528 }, { "name": "model.layers.76.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 20774912 }, { "name": "model.layers.76.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22085632 }, { "name": "model.layers.77.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23134208 }, { "name": "model.layers.77.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 23150592 } ], "md5sum": "6bdd72c5a969adeb1f0bcae184c71628" }, { "dataPath": "params_shard_355.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.77.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "a07d1615dfa38ae8b29691fdf1f8bcc6" }, { "dataPath": "params_shard_356.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.77.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "f32cdabfa2712faf443b7ea9bcb459bd" }, { "dataPath": "params_shard_357.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.78.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "a0b3234b20d495cd1e333289960566fa" }, { "dataPath": "params_shard_358.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.78.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "9f303035848c25c8adbbd3bf45929f20" }, { "dataPath": "params_shard_359.bin", "format": "raw-shard", "nbytes": 12075008, "records": [ { "name": "model.layers.77.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.77.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.77.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 7356416 }, { "name": "model.layers.77.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8667136 }, { "name": "model.layers.78.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 9715712 }, { "name": "model.layers.78.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 11026432 } ], "md5sum": "da319a8acbd1182f319b895a01eb7277" } ] }