{ "metadata": { "ParamSize": 805, "ParamBytes": 36381212672.0, "BitsPerParam": 3.4891900844756822 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 525336576, "records": [ { "name": "lm_head.q_weight", "shape": [ 1024, 128256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 525336576, "byteOffset": 0 } ], "md5sum": "efc3106ddacb59cc63dedebceab79b3b" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.78.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "36049a5c7c6cefb1890416aa9e08385d" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.78.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "e2c95524ec7fcc8657baebdabb16fc88" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.79.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "ae651a6c8f0170ad62323b7794eb7b81" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.79.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "90d218e0c443262dd5899ddd12dafe7e" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 31145984, "records": [ { "name": "lm_head.q_scale", "shape": [ 64, 128256 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16416768, "byteOffset": 0 }, { "name": "model.layers.78.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 16416768 }, { "name": "model.layers.78.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 16433152 }, { "name": "model.layers.78.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 20103168 }, { "name": "model.layers.78.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 27443200 }, { "name": "model.layers.79.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 27459584 }, { "name": "model.layers.79.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 27475968 } ], "md5sum": "96bc41d7a0f40cc2987b4f49ddd6e941" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.79.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "8139517cf3bbe9b99961bf8a585d889e" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.79.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "af95b0c53c98996086b6df8b8c4533fb" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 525336576, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 128256, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 525336576, "byteOffset": 0 } ], "md5sum": "a47c1e9f873f5e3ec3f2bb7b1ace4480" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "461ac9c58457e98884edda3bf83e9d90" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "b31e633c75c09ddadc7910da8dd95373" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 29835264, "records": [ { "name": "model.layers.79.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.79.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.79.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 7356416 }, { "name": "model.layers.79.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8667136 }, { "name": "model.norm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9715712 }, { "name": "model.embed_tokens.q_scale", "shape": [ 128256, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16416768, "byteOffset": 9732096 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 26148864 }, { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 26165248 } ], "md5sum": "31d96a4e56ff7ed5c26126a488abd8da" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.0.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "b6e2eead7d76b8bb66ebf605342bc43b" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "19d87243166e2d694c47d2f6ee154c45" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "b4134042e8e2271ee1906ef46e7a5646" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.1.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "e28664c00b17dcd0437a2044a169c66e" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "f6800f53bc1ecc054830ab186cf1d89e" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "a6ce78cb8c529187983b8e2cb2f30160" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "32cee9c4a1e8fdb07dc32820a70abd2a" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "21fbe364952260b0b89c278b58a6d836" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 26804224, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.0.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 7356416 }, { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8667136 }, { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 9715712 }, { "name": "model.layers.1.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 17055744 }, { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 18366464 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19415040 }, { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 19431424 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23101440 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23117824 }, { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 23134208 } ], "md5sum": "08d8b9d344609acb81e0bb7280260738" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.2.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "cf0d93a7c414976603afd7bb7baf6e89" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "45567238ae6903cecbda07a628856d59" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "e2c271d7c74952c1041ef4525e9f669d" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "bd5b6463cb8619b96f7eeff7795e9fec" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.3.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "2d67cf98718cc6b39a023d9585def1dc" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "cbc1d58aabffdee7fc1e42d7163d09be" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.4.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "f5d90bc4ef942e770c5aec49b47d29ed" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "fa3c46830562a8199e81b30cd9032fd6" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "836c4b529b95c129b9888e8caa79bdd6" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "99591644fcbb2bf53edcf4698e756627" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 29163520, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.2.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 7356416 }, { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8667136 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9715712 }, { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 9732096 }, { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 13402112 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20742144 }, { "name": "model.layers.3.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 20758528 }, { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22069248 }, { "name": "model.layers.4.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 23117824 }, { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 24428544 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 25477120 }, { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 25493504 } ], "md5sum": "4c80986ec0b7b5f7c5a0c3bd23328314" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.10.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "28d9791c199ae6e005afa0cc563e4aa7" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "9e551119f6f373b6bedcc28cd54ceb04" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "0a75db23bd30229a6a684e50782a2b58" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "798715f2f16c011b54b39c452dd129d1" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.11.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "69f00bba71eaed7441f0572e68f34c69" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "fabd20af68d2087c3e716c160dbec81a" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "439808e35a944fe97369792e5fd5d65f" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.12.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "f681bb8659f4e2eef60152eac735eba2" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.12.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "0ea12c1e1b13c9f3df3212ea231d73ba" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "fe33ab153d3db0709a8814026bf2d83c" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 32833536, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.10.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 7356416 }, { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8667136 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9715712 }, { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 9732096 }, { "name": "model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 13402112 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20742144 }, { "name": "model.layers.11.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 20758528 }, { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22069248 }, { "name": "model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 23117824 }, { "name": "model.layers.12.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 30457856 }, { "name": "model.layers.12.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 31768576 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 32817152 } ], "md5sum": "aaa76d644b181852ee5cc6a0341878dc" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.12.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "2545b3f3a391c458d2dbdc238a350b88" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.13.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "d399efe44d19ba351ed5b217925c6e37" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "b681b7a89c6ca9cf0e0672911b3049d8" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.13.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "dd314dc1a8b0d1468f810bed3ca2e690" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.13.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "2e72be21a183008d1116a7ea5b6b30e3" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.14.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "bfea0f57ec8fbb99b3676895a957e34e" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "9c23e17bb3165d8077010974037dbba4" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 28147712, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7356416 }, { "name": "model.layers.12.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 7372800 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11042816 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 11059200 }, { "name": "model.layers.13.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 11075584 }, { "name": "model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 14745600 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 22085632 }, { "name": "model.layers.13.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 22102016 }, { "name": "model.layers.13.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 23412736 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24461312 }, { "name": "model.layers.14.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 24477696 } ], "md5sum": "a3b89179e009b322247936852890aa66" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.14.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "350d83bf184738aa93c9745d07440cc7" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.14.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "7a08af237516e145aa6f649cc6826329" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.15.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "ee84e5f58dd9660d202ec2892dbc764b" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "d9d337af28898c95fdc5608bd6003a76" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.15.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "291fe2df89cd469e3c9d748b3792d396" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "c8caaf999299ca029fb625e73ff91626" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.16.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "fbe13edfaf9949268b65fa5eeb1764c8" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "53ccd14f1f460e65524b69a52a087a57" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 26804224, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.14.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 7356416 }, { "name": "model.layers.14.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8667136 }, { "name": "model.layers.15.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 9715712 }, { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 11026432 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 12075008 }, { "name": "model.layers.15.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 12091392 }, { "name": "model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 15761408 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23101440 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23117824 }, { "name": "model.layers.16.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 23134208 } ], "md5sum": "fc1569504d80317866b4b785ecf4d7ad" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.16.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "c2e8f889170d3d5a45c0f72945f0a604" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.16.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "4870277932d777eeb1f668174a743162" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.17.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "133b8288227685b47d1cc42c92df8932" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "3017c1eb7cd1c676fbb2eb306f979fef" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.17.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "b0af2f8cc017943f6e492cf84805af5c" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.17.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "88c3278654da4b69817be0d12df77a62" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.18.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "9779f3162a3c7936b36aa35daa0bcfdc" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.18.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "93ec42ab2d4ab9ab775ab295e4aa4d34" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "ee8d1bb7d44882391b16c0436ac49eac" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 28114944, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.16.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 7356416 }, { "name": "model.layers.16.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8667136 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9715712 }, { "name": "model.layers.17.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 9732096 }, { "name": "model.layers.17.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 13402112 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20742144 }, { "name": "model.layers.17.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 20758528 }, { "name": "model.layers.17.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22069248 }, { "name": "model.layers.18.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 23117824 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24428544 }, { "name": "model.layers.18.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 24444928 } ], "md5sum": "734e40d97103ae36bda3427ea6b4af06" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.18.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "645883bbd98fab81fd01bf3de4274b86" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.19.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "78d759d90eef5c6a6f9eccf5405a9827" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "d4dfd20e81aef9ed1794d09ec3532192" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.19.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "267271c837560cb674e914b47967b80f" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.19.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "17d7996b897d512b5d23be981d2c2290" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.20.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "896e0fb9db3cef036b15d0e6310ba4d7" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "c33a11d70718b20a4bc2c8e39e717309" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.20.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "539214b47500266d857f8bf16318c5fe" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 32817152, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.18.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 7356416 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 8404992 }, { "name": "model.layers.19.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 8421376 }, { "name": "model.layers.19.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 12091392 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19431424 }, { "name": "model.layers.19.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 19447808 }, { "name": "model.layers.19.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 20758528 }, { "name": "model.layers.20.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 21807104 }, { "name": "model.layers.20.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 25477120 } ], "md5sum": "1751b659ea431a0081fff8d9a9800f3e" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.20.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "d990acc7f22a15a0838e93ac105e7ffc" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.21.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "04a8c42ecb1fd8d1b0f77ca2149334bc" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "2b546ad73f9ac5395f2b3fbac6562f99" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.21.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "7102f87e8cde0d7eb99061982ea12b69" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.21.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "6b76f9c63bc0a60332858e79b90ed2f9" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.22.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "0e77d557c427dd10b080c3a5778e715a" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "2c03d1a743c17d0c9b397d02d88c3f34" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.22.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "1deb4a0aa97bb0fbb093a68e6bdc34a0" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.22.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "74aca53a45095a4e565574c0f65dec40" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "6f723020471d1d681831d68dae8b6dca" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 29196288, "records": [ { "name": "model.layers.20.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 0 }, { "name": "model.layers.20.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 1310720 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2359296 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2375680 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2392064 }, { "name": "model.layers.21.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 2408448 }, { "name": "model.layers.21.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 6078464 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 13418496 }, { "name": "model.layers.21.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 13434880 }, { "name": "model.layers.21.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 14745600 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15794176 }, { "name": "model.layers.22.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 15810560 }, { "name": "model.layers.22.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 19480576 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 26820608 }, { "name": "model.layers.22.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 26836992 }, { "name": "model.layers.22.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 28147712 } ], "md5sum": "5222964191aeaf670197ebb03a7499ac" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.23.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "0b33806c72531ea1a2192450eaa3bf93" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.23.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "0ac773ba6b00a672ac4dd2c1159c65ae" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.23.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "c1aa9ad5e3634cb53371335a77818184" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.24.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "9d2e386859f949e60753e56870f1ba2e" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "8508563afa0aae2f4f1cd0895e7a23e9" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.24.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "e45daddc3d3f6bc2a2f8f4728922ba07" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.24.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "a5a9ace61b1406be7f5ef6f18cc9659e" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.25.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "90d23776c4597e826cc411fd2f2b9e15" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "8621ad9ec02cdc7667d466c25dbed8d7" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 30490624, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.23.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 7340032 }, { "name": "model.layers.23.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8650752 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9699328 }, { "name": "model.layers.23.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 9715712 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 13385728 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 13402112 }, { "name": "model.layers.24.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 13418496 }, { "name": "model.layers.24.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 17088512 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24428544 }, { "name": "model.layers.24.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 24444928 }, { "name": "model.layers.24.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 25755648 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 26804224 }, { "name": "model.layers.25.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 26820608 } ], "md5sum": "858fbc7d54b4f6bc263e04003f2ed89e" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.25.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "3aef0533702e22845972749ea4f22901" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.25.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "982736dffd318c9243d2e561ba7534fb" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.26.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "661a4ac90d6361f9b0c2e6535d754a0d" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.26.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "d2bef4d1e0be257af5ef4d497c64e15b" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.26.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "00be6d19e977b0c27d0bc3a816dcae5e" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "0500f99f77ffd5ec04c9bb1881c00d22" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.27.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "cb20e931c736dd66029cf9d200f24345" }, { "dataPath": "params_shard_107.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "17911467586b65ba46de01a520ead80d" }, { "dataPath": "params_shard_108.bin", "format": "raw-shard", "nbytes": 26804224, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.25.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 7356416 }, { "name": "model.layers.25.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8667136 }, { "name": "model.layers.26.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 9715712 }, { "name": "model.layers.26.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 11026432 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 12075008 }, { "name": "model.layers.26.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 12091392 }, { "name": "model.layers.26.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 15761408 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23101440 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23117824 }, { "name": "model.layers.27.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 23134208 } ], "md5sum": "44939e754c8fd729c3004338de3d4739" }, { "dataPath": "params_shard_109.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.27.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "9f2d02f48da81ed1f22858a7705d59e0" }, { "dataPath": "params_shard_110.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.27.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "7053ecbdd0de845cee5a81ca57b69819" }, { "dataPath": "params_shard_111.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.28.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "3d1ba282ad7c39f5212643609c2f0f0b" }, { "dataPath": "params_shard_112.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "f8ba1467cc6b4838d6fb64d87f9133b3" }, { "dataPath": "params_shard_113.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.28.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "88b51a1d9d127930d46aeaa4adf6d376" }, { "dataPath": "params_shard_114.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.28.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "cb6c0adc8940c3fdc638677752777039" }, { "dataPath": "params_shard_115.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.29.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "a957d773db9752f312f2ade0f3a4b4dd" }, { "dataPath": "params_shard_116.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.29.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "8881cc355dad078b799edddc801f2620" }, { "dataPath": "params_shard_117.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "952b788d1e99995b989e3c0a2bc304c9" }, { "dataPath": "params_shard_118.bin", "format": "raw-shard", "nbytes": 28114944, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.27.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 7356416 }, { "name": "model.layers.27.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8667136 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9715712 }, { "name": "model.layers.28.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 9732096 }, { "name": "model.layers.28.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 13402112 }, { "name": "model.layers.28.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20742144 }, { "name": "model.layers.28.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 20758528 }, { "name": "model.layers.28.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22069248 }, { "name": "model.layers.29.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 23117824 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24428544 }, { "name": "model.layers.29.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 24444928 } ], "md5sum": "8adbc3a85aa622934d8cab5ed005b1ef" }, { "dataPath": "params_shard_119.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.29.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "c161f608a782ec2637acb1ee002ff676" }, { "dataPath": "params_shard_120.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.30.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "61ce6ce1e9365b2732ae84d17363ffe1" }, { "dataPath": "params_shard_121.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "bbe4b53fc10501cce81ad52aa1b0351b" }, { "dataPath": "params_shard_122.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.30.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "8d364aba09f983a3d4d64075f46e5015" }, { "dataPath": "params_shard_123.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.30.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "54a589d34700017a255efb2d30eb2e49" }, { "dataPath": "params_shard_124.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.31.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "a5ee24cbb3ff48d55f72b28f10497fce" }, { "dataPath": "params_shard_125.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "2d918e3e2bc8c63174c81b1ece723eef" }, { "dataPath": "params_shard_126.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.31.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "6ec78b5ddb076910f3e8c132754133fa" }, { "dataPath": "params_shard_127.bin", "format": "raw-shard", "nbytes": 32817152, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.29.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.29.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 7356416 }, { "name": "model.layers.30.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 8404992 }, { "name": "model.layers.30.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 8421376 }, { "name": "model.layers.30.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 12091392 }, { "name": "model.layers.30.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19431424 }, { "name": "model.layers.30.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 19447808 }, { "name": "model.layers.30.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 20758528 }, { "name": "model.layers.31.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 21807104 }, { "name": "model.layers.31.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 25477120 } ], "md5sum": "2053818fb6808d4e5d58acbd5c06502b" }, { "dataPath": "params_shard_128.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.31.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "b7c8a6036f97e7dd997fbc70cea65519" }, { "dataPath": "params_shard_129.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.32.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "3e29bd0cb4d1dbc94bd2c61e1dc1d904" }, { "dataPath": "params_shard_130.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.32.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "b9a31d86d660a4c08c055af3fa7b2955" }, { "dataPath": "params_shard_131.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.32.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "7ea20d38da52a73dbad6972fd5fc4bc1" }, { "dataPath": "params_shard_132.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.32.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "29650534d7226d5ac103e3872ab52176" }, { "dataPath": "params_shard_133.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.33.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "bb579cd85f9bd2c14bea4727b6f997bf" }, { "dataPath": "params_shard_134.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.33.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "6e09f1c77a207795b4966d1b5f1af04d" }, { "dataPath": "params_shard_135.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.33.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "d125e8c6f9fe236ae25cc2c8e3dcb443" }, { "dataPath": "params_shard_136.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.33.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "66de76ba11aaf9d18cc18289ea26fed0" }, { "dataPath": "params_shard_137.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.34.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "8a5e6282872d6109831d1e77936358bf" }, { "dataPath": "params_shard_138.bin", "format": "raw-shard", "nbytes": 29196288, "records": [ { "name": "model.layers.31.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 0 }, { "name": "model.layers.31.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 1310720 }, { "name": "model.layers.31.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2359296 }, { "name": "model.layers.31.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2375680 }, { "name": "model.layers.32.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2392064 }, { "name": "model.layers.32.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 2408448 }, { "name": "model.layers.32.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 6078464 }, { "name": "model.layers.32.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 13418496 }, { "name": "model.layers.32.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 13434880 }, { "name": "model.layers.32.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 14745600 }, { "name": "model.layers.33.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 15794176 }, { "name": "model.layers.33.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 15810560 }, { "name": "model.layers.33.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 19480576 }, { "name": "model.layers.33.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 26820608 }, { "name": "model.layers.33.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 26836992 }, { "name": "model.layers.33.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 28147712 } ], "md5sum": "055a76a213994970bd70ead858de22fd" }, { "dataPath": "params_shard_139.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.34.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "b5de4d25132488889ea7c9b856c3a30a" }, { "dataPath": "params_shard_140.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.34.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "e4b26c8ed60b1458bc337d6949daf948" }, { "dataPath": "params_shard_141.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.34.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "3eefa8179e70bbf813d9eeec8f5e4407" }, { "dataPath": "params_shard_142.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.35.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "da067879fdf9ddece47a774680f7d85d" }, { "dataPath": "params_shard_143.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.35.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "140e3d6b8902f7612154c34210e8bcdd" }, { "dataPath": "params_shard_144.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.35.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "59a8520523443a4ad75feb257aced266" }, { "dataPath": "params_shard_145.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.35.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "139d024e10141bd2210892113a10398c" }, { "dataPath": "params_shard_146.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.36.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "045322eb6775ef93862d81b552dccf3e" }, { "dataPath": "params_shard_147.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.36.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "191190243f6028daa2529a57d6afaf43" }, { "dataPath": "params_shard_148.bin", "format": "raw-shard", "nbytes": 30490624, "records": [ { "name": "model.layers.34.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.34.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 7340032 }, { "name": "model.layers.34.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8650752 }, { "name": "model.layers.34.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9699328 }, { "name": "model.layers.34.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 9715712 }, { "name": "model.layers.34.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 13385728 }, { "name": "model.layers.35.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 13402112 }, { "name": "model.layers.35.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 13418496 }, { "name": "model.layers.35.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 17088512 }, { "name": "model.layers.35.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24428544 }, { "name": "model.layers.35.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 24444928 }, { "name": "model.layers.35.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 25755648 }, { "name": "model.layers.36.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 26804224 }, { "name": "model.layers.36.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 26820608 } ], "md5sum": "9b90d109c8d332bba180929da648f01c" }, { "dataPath": "params_shard_149.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.36.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "775d7ab3b44b4660d554235f865211d0" }, { "dataPath": "params_shard_150.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.36.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "a1bcfae5d73bf9052551f9218df2995c" }, { "dataPath": "params_shard_151.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.37.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "3dc56748598279ad43faf2596fd4f55e" }, { "dataPath": "params_shard_152.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.37.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "9ec01ced515a72eb8b18319bddbcf8a0" }, { "dataPath": "params_shard_153.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.37.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "105ea8484ef46615a1f2ad5671aee963" }, { "dataPath": "params_shard_154.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.37.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "c7f998a4c849057534eafe83c886020f" }, { "dataPath": "params_shard_155.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.38.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "cb927c69287f074ccd4b15f1eb598545" }, { "dataPath": "params_shard_156.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.38.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "03beda518b8f8e787cdd3753c109bb35" }, { "dataPath": "params_shard_157.bin", "format": "raw-shard", "nbytes": 26804224, "records": [ { "name": "model.layers.36.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.36.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.36.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 7356416 }, { "name": "model.layers.36.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8667136 }, { "name": "model.layers.37.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 9715712 }, { "name": "model.layers.37.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 11026432 }, { "name": "model.layers.37.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 12075008 }, { "name": "model.layers.37.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 12091392 }, { "name": "model.layers.37.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 15761408 }, { "name": "model.layers.37.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23101440 }, { "name": "model.layers.38.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23117824 }, { "name": "model.layers.38.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 23134208 } ], "md5sum": "07d5654cb3b6f884d563731f7b39714b" }, { "dataPath": "params_shard_158.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.38.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "5ce36c7b503a92b2c75cdb4e24bab552" }, { "dataPath": "params_shard_159.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.38.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "4c4631e0be0317b159944cb805c5ea18" }, { "dataPath": "params_shard_160.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.39.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "49e9b7acd07d85afb8961172b8972ae3" }, { "dataPath": "params_shard_161.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.39.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "8b8d625216eba19efd03f56308af8a51" }, { "dataPath": "params_shard_162.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.39.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "39a3fb5744a374c1da57764864427fe6" }, { "dataPath": "params_shard_163.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.39.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "260d10a132eed301d1c4268226dd4ba0" }, { "dataPath": "params_shard_164.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.40.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "2fb2656a8b828e877c4936b7823801be" }, { "dataPath": "params_shard_165.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "6890ba346d0f4b2f360b53c6291cf119" }, { "dataPath": "params_shard_166.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "f350e165acab2b5cd314c34d3c022593" }, { "dataPath": "params_shard_167.bin", "format": "raw-shard", "nbytes": 28114944, "records": [ { "name": "model.layers.38.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.38.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.38.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 7356416 }, { "name": "model.layers.38.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8667136 }, { "name": "model.layers.39.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9715712 }, { "name": "model.layers.39.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 9732096 }, { "name": "model.layers.39.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 13402112 }, { "name": "model.layers.39.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20742144 }, { "name": "model.layers.39.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 20758528 }, { "name": "model.layers.39.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22069248 }, { "name": "model.layers.40.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 23117824 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24428544 }, { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 24444928 } ], "md5sum": "39899864648fa6b55b919f49d170ee9d" }, { "dataPath": "params_shard_168.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "9a134ff3a4ae528ca63884d235566d23" }, { "dataPath": "params_shard_169.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "1a74ee3639d6a21cb0661de52d402a10" }, { "dataPath": "params_shard_170.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.5.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "1eb4da44ac142f85140f801a965578af" }, { "dataPath": "params_shard_171.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "6da0db8ea70a7108905e2df938ae42b3" }, { "dataPath": "params_shard_172.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "76f44ff3aa78486c231ae85ed67d7fbc" }, { "dataPath": "params_shard_173.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "4fe75e81a2bc3165af1cb128095b01d8" }, { "dataPath": "params_shard_174.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.6.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "e9a563729ac3ae402d34e3fdae940716" }, { "dataPath": "params_shard_175.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "9b24ded1d472f16553fca72afc161148" }, { "dataPath": "params_shard_176.bin", "format": "raw-shard", "nbytes": 33112064, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7356416 }, { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 7372800 }, { "name": "model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 11042816 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 18382848 }, { "name": "model.layers.5.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 18399232 }, { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 19709952 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20758528 }, { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 20774912 }, { "name": "model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 24444928 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 31784960 }, { "name": "model.layers.6.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 31801344 } ], "md5sum": "ff9e8cca767602743e4167feb03e3998" }, { "dataPath": "params_shard_177.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.7.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "d9cc2fadae62712bdfd4f482ad938645" }, { "dataPath": "params_shard_178.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.40.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "e6edf8c9f12dc4c0fff2ceb1622da0fd" }, { "dataPath": "params_shard_179.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.40.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "45b0c0b9d26eb3585d8f5b65aa6832bd" }, { "dataPath": "params_shard_180.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.40.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "4cce8bccba5b9f7c5d04ee3ed232d476" }, { "dataPath": "params_shard_181.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.41.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "6904c1c7c44c2a54c92c55335699de26" }, { "dataPath": "params_shard_182.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.41.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "27222ee4384b8e7d31e503dd43e8d1ea" }, { "dataPath": "params_shard_183.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.41.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "31bde54253e3f8c5854321d8897ad7d9" }, { "dataPath": "params_shard_184.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.41.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "a2a60b521290a1f02bb570dca08567ee" }, { "dataPath": "params_shard_185.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.42.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "dea771089863a05c4a46af2b6a689827" }, { "dataPath": "params_shard_186.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.42.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "7c928581b9b3118a79f472b2814f7d42" }, { "dataPath": "params_shard_187.bin", "format": "raw-shard", "nbytes": 31522816, "records": [ { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 0 }, { "name": "model.layers.7.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 1048576 }, { "name": "model.layers.40.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2359296 }, { "name": "model.layers.40.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 2375680 }, { "name": "model.layers.40.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 6045696 }, { "name": "model.layers.40.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 13385728 }, { "name": "model.layers.40.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 13402112 }, { "name": "model.layers.41.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14450688 }, { "name": "model.layers.41.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 14467072 }, { "name": "model.layers.41.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 18137088 }, { "name": "model.layers.41.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 25477120 }, { "name": "model.layers.41.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 25493504 }, { "name": "model.layers.41.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26804224 }, { "name": "model.layers.42.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 27852800 } ], "md5sum": "e6c495ae606e5d7aa5d8a00462748a8f" }, { "dataPath": "params_shard_188.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.42.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "5fce175298a7dec1c3a9bf999e07df1c" }, { "dataPath": "params_shard_189.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.42.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "c0a714c1a46dc0b4262a9039269433a1" }, { "dataPath": "params_shard_190.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.43.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "59b5a3c2ad2e23db7f2948917bb6618b" }, { "dataPath": "params_shard_191.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.43.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "1b233fbb96205c8be78d038889a354a3" }, { "dataPath": "params_shard_192.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.43.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "08988bc1663ad792db7c5831a928cc09" }, { "dataPath": "params_shard_193.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.43.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "b9c3e45c5fb6f772e657bab08513740c" }, { "dataPath": "params_shard_194.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.44.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "246b28a05658c402b3e5ff9d08c71e85" }, { "dataPath": "params_shard_195.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.44.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "9375ac6066011d6649bf828856831b34" }, { "dataPath": "params_shard_196.bin", "format": "raw-shard", "nbytes": 26820608, "records": [ { "name": "model.layers.42.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.42.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 7340032 }, { "name": "model.layers.42.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8650752 }, { "name": "model.layers.42.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9699328 }, { "name": "model.layers.42.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9715712 }, { "name": "model.layers.43.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9732096 }, { "name": "model.layers.43.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 9748480 }, { "name": "model.layers.43.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 13418496 }, { "name": "model.layers.43.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20758528 }, { "name": "model.layers.43.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 20774912 }, { "name": "model.layers.43.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22085632 }, { "name": "model.layers.44.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23134208 }, { "name": "model.layers.44.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 23150592 } ], "md5sum": "d4c15af9cb3177e5cea00074758ca5db" }, { "dataPath": "params_shard_197.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.44.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "1ea1b55fc7a01acb8b4a600aac24fbf9" }, { "dataPath": "params_shard_198.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.44.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "ecd6241392bb7009d263d4b78d088e08" }, { "dataPath": "params_shard_199.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.45.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "1daa2f01eeb21c8e65d33d6b6cf946b7" }, { "dataPath": "params_shard_200.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.45.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "9330d33b7991465fba6d46905bba623b" }, { "dataPath": "params_shard_201.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.45.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "3a772243cb7817c0d3e2c1d5d80905f4" }, { "dataPath": "params_shard_202.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.45.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "007feb0db047a84a7edd00275f26ca80" }, { "dataPath": "params_shard_203.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.46.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "29afe2ad4d7769089841d5256c06551d" }, { "dataPath": "params_shard_204.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.46.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "74af429196f83ca7caa7c492bb426201" }, { "dataPath": "params_shard_205.bin", "format": "raw-shard", "nbytes": 26804224, "records": [ { "name": "model.layers.44.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.44.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.44.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 7356416 }, { "name": "model.layers.44.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8667136 }, { "name": "model.layers.45.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 9715712 }, { "name": "model.layers.45.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 17055744 }, { "name": "model.layers.45.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 18366464 }, { "name": "model.layers.45.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19415040 }, { "name": "model.layers.45.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 19431424 }, { "name": "model.layers.45.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23101440 }, { "name": "model.layers.46.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23117824 }, { "name": "model.layers.46.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 23134208 } ], "md5sum": "31cef1a795c8c756208c7153cd8456ed" }, { "dataPath": "params_shard_206.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.46.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "2062c58cc650ef920da280408691046d" }, { "dataPath": "params_shard_207.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.46.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "12ea5f469b0b89659f23856d1e0c3a4d" }, { "dataPath": "params_shard_208.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.47.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "349294d356fadbedc5436cf0633f52c3" }, { "dataPath": "params_shard_209.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.47.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "f0461ae086bee34eaaf7cbd6724d9d20" }, { "dataPath": "params_shard_210.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.47.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "10dbfeb68032b4cca3d8e0f27363a410" }, { "dataPath": "params_shard_211.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.47.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "8d45337a2332d3f018002bfefb755639" }, { "dataPath": "params_shard_212.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.48.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "996f4931cdb25968db396f5126cc53f0" }, { "dataPath": "params_shard_213.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.48.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "52a9b502893de2e417f23dac22d2ba46" }, { "dataPath": "params_shard_214.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.48.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "f69e38251381ec00bf8bf42a3db899f2" }, { "dataPath": "params_shard_215.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.48.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "fe4179ea221aa6e8d55004f1e0d6918e" }, { "dataPath": "params_shard_216.bin", "format": "raw-shard", "nbytes": 29163520, "records": [ { "name": "model.layers.46.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.46.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.46.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 7356416 }, { "name": "model.layers.46.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8667136 }, { "name": "model.layers.47.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9715712 }, { "name": "model.layers.47.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 9732096 }, { "name": "model.layers.47.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 13402112 }, { "name": "model.layers.47.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20742144 }, { "name": "model.layers.47.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 20758528 }, { "name": "model.layers.47.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22069248 }, { "name": "model.layers.48.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 23117824 }, { "name": "model.layers.48.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 24428544 }, { "name": "model.layers.48.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 25477120 }, { "name": "model.layers.48.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 25493504 } ], "md5sum": "eb1f0c7ae0fb86402704f242db7e928a" }, { "dataPath": "params_shard_217.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.49.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "7a1dfb20c61789921e4956dec789be62" }, { "dataPath": "params_shard_218.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.49.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "a727365ed30ab3228e335fec05539e04" }, { "dataPath": "params_shard_219.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.49.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "62c2cec8b5f66265c16371a8e6c4a12b" }, { "dataPath": "params_shard_220.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.49.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "ccd1194083edc0191f4b15c636a4218f" }, { "dataPath": "params_shard_221.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.50.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "7bac4a6b9fa646a349be72e41efe0fe7" }, { "dataPath": "params_shard_222.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.50.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "4a7e478463033f40abc5e9ab1b862f9f" }, { "dataPath": "params_shard_223.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.50.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "25187d25b282bfaaa26ba841000e1137" }, { "dataPath": "params_shard_224.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.50.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "463ccfddea3ac30b0a9dd6aa4a605918" }, { "dataPath": "params_shard_225.bin", "format": "raw-shard", "nbytes": 33112064, "records": [ { "name": "model.layers.48.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.48.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.49.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7356416 }, { "name": "model.layers.49.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 7372800 }, { "name": "model.layers.49.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 11042816 }, { "name": "model.layers.49.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 18382848 }, { "name": "model.layers.49.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 18399232 }, { "name": "model.layers.49.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 19709952 }, { "name": "model.layers.50.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20758528 }, { "name": "model.layers.50.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 20774912 }, { "name": "model.layers.50.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 24444928 }, { "name": "model.layers.50.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 31784960 }, { "name": "model.layers.50.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 31801344 } ], "md5sum": "eedaa57f27c1fe7ce8c41bd6f38a4c6d" }, { "dataPath": "params_shard_226.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.51.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "3b02858733bfaf3fd0c86f4b9b9f894a" }, { "dataPath": "params_shard_227.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.51.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "f75d22bae618374049902b29096dfcef" }, { "dataPath": "params_shard_228.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.51.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "b8b83f1d17e8aa71ba3fecf799d68ce0" }, { "dataPath": "params_shard_229.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.51.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "8261528683ee1a808c7c7e68d26349b4" }, { "dataPath": "params_shard_230.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.52.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "61af7bc049a8a06c17c1184ca7888190" }, { "dataPath": "params_shard_231.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.52.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "03e3f84db466673144e1491389794a08" }, { "dataPath": "params_shard_232.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.52.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "859ffcf07fb117353a3a23638663da9f" }, { "dataPath": "params_shard_233.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.52.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "f3e00bc21f9e459b03d5a0f1ef7cb2a4" }, { "dataPath": "params_shard_234.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.53.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "b7b0b7e0da90027415b73d63eff87fff" }, { "dataPath": "params_shard_235.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.53.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "f2ac4d9e1abd09cfce1a98ca93775241" }, { "dataPath": "params_shard_236.bin", "format": "raw-shard", "nbytes": 31522816, "records": [ { "name": "model.layers.50.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 0 }, { "name": "model.layers.51.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 1048576 }, { "name": "model.layers.51.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2359296 }, { "name": "model.layers.51.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 2375680 }, { "name": "model.layers.51.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 6045696 }, { "name": "model.layers.51.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 13385728 }, { "name": "model.layers.51.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 13402112 }, { "name": "model.layers.52.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14450688 }, { "name": "model.layers.52.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 14467072 }, { "name": "model.layers.52.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 18137088 }, { "name": "model.layers.52.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 25477120 }, { "name": "model.layers.52.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 25493504 }, { "name": "model.layers.52.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26804224 }, { "name": "model.layers.53.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 27852800 } ], "md5sum": "6ef21c46f38e29116f074d0878b61c72" }, { "dataPath": "params_shard_237.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.53.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "8f0a79f46e54e7cd94fb78ce43dd0109" }, { "dataPath": "params_shard_238.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.53.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "d8118f073496a31d57e36c27c4682899" }, { "dataPath": "params_shard_239.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.54.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "e66b04c0ac2ad29cf7f4eb19da690381" }, { "dataPath": "params_shard_240.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.54.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "ed2963c2d79d9771a7a011bba90f9511" }, { "dataPath": "params_shard_241.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.54.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "846c88e8491cf9c7cf3aebf9a2a22870" }, { "dataPath": "params_shard_242.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.54.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "f94e688deb575e80341b2e7ff05ecef2" }, { "dataPath": "params_shard_243.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.55.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "695e771878697b249e7c4b1480fedd91" }, { "dataPath": "params_shard_244.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.55.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "64d6f9378340511ceb761132b9da9059" }, { "dataPath": "params_shard_245.bin", "format": "raw-shard", "nbytes": 26820608, "records": [ { "name": "model.layers.53.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.53.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 7340032 }, { "name": "model.layers.53.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8650752 }, { "name": "model.layers.53.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9699328 }, { "name": "model.layers.53.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9715712 }, { "name": "model.layers.54.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9732096 }, { "name": "model.layers.54.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 9748480 }, { "name": "model.layers.54.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 13418496 }, { "name": "model.layers.54.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20758528 }, { "name": "model.layers.54.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 20774912 }, { "name": "model.layers.54.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22085632 }, { "name": "model.layers.55.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23134208 }, { "name": "model.layers.55.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 23150592 } ], "md5sum": "9f774c3119f998b209827aa099f40e5d" }, { "dataPath": "params_shard_246.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.55.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "a0c4d1ce9caf17ec4515473c2ec360f0" }, { "dataPath": "params_shard_247.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.55.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "8cac8cfe2c3b7378f6597a8a7042f666" }, { "dataPath": "params_shard_248.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.56.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "0d4ed75460456dc8ae83829513b0ed69" }, { "dataPath": "params_shard_249.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.56.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "06908a9b54118e5d1454499a82d7b340" }, { "dataPath": "params_shard_250.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.56.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "1e5a65a54d3938d25b1d37c6a53deb5a" }, { "dataPath": "params_shard_251.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.56.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "db5678b117a1498f67d9295802704048" }, { "dataPath": "params_shard_252.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.57.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "b9c50f8bdb8f8968b60a4365904cd1cc" }, { "dataPath": "params_shard_253.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.57.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "592f12707a3dea80682bfcfb824d6acc" }, { "dataPath": "params_shard_254.bin", "format": "raw-shard", "nbytes": 26804224, "records": [ { "name": "model.layers.55.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.55.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.55.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 7356416 }, { "name": "model.layers.55.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8667136 }, { "name": "model.layers.56.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 9715712 }, { "name": "model.layers.56.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 17055744 }, { "name": "model.layers.56.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 18366464 }, { "name": "model.layers.56.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19415040 }, { "name": "model.layers.56.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 19431424 }, { "name": "model.layers.56.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23101440 }, { "name": "model.layers.57.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23117824 }, { "name": "model.layers.57.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 23134208 } ], "md5sum": "45f7133a2dfb75beaec944478d28dc62" }, { "dataPath": "params_shard_255.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.57.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "9c3210f06f10fd819868b4e84ca13684" }, { "dataPath": "params_shard_256.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.57.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "2c5b856a0ab4b11953d8b969b9c2c1d8" }, { "dataPath": "params_shard_257.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.58.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "0018763a10362d3734242db70c2e890c" }, { "dataPath": "params_shard_258.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.58.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "f9e4079c488592340e02341cdacb0d92" }, { "dataPath": "params_shard_259.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.58.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "f1e2d50cefa5d0a0b416b2fd81185bd4" }, { "dataPath": "params_shard_260.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.58.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "40bdac611071c9ff517a7d79a67756f2" }, { "dataPath": "params_shard_261.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.59.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "aa529cb97dfb8a61bdb7cb12aaa4388d" }, { "dataPath": "params_shard_262.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.59.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "d614be14c2791c006843525f577ff58b" }, { "dataPath": "params_shard_263.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.59.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "d0f0eb07fbff3413f5128980ee871aa7" }, { "dataPath": "params_shard_264.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.59.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "73a6916d3715482b8b97de9b063a6416" }, { "dataPath": "params_shard_265.bin", "format": "raw-shard", "nbytes": 29163520, "records": [ { "name": "model.layers.57.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.57.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.57.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 7356416 }, { "name": "model.layers.57.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8667136 }, { "name": "model.layers.58.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9715712 }, { "name": "model.layers.58.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 9732096 }, { "name": "model.layers.58.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 13402112 }, { "name": "model.layers.58.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20742144 }, { "name": "model.layers.58.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 20758528 }, { "name": "model.layers.58.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22069248 }, { "name": "model.layers.59.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 23117824 }, { "name": "model.layers.59.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 24428544 }, { "name": "model.layers.59.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 25477120 }, { "name": "model.layers.59.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 25493504 } ], "md5sum": "bdafe44cd23d7825145b12e78fc1ee41" }, { "dataPath": "params_shard_266.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.60.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "d65876c8a3f273d2a4513aa4e3d556a6" }, { "dataPath": "params_shard_267.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.60.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "73fe1f9f7200ac2bdca55c24b283676c" }, { "dataPath": "params_shard_268.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.60.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "e9f85fca25ae39c59a17c3feac3d0813" }, { "dataPath": "params_shard_269.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.60.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "a4602dd43d6e2ec062ba3ce1f856147c" }, { "dataPath": "params_shard_270.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.61.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "db9e4b190b5f779290039f75be96bda1" }, { "dataPath": "params_shard_271.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.61.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "ce8c6fc24c9f885854c35a7125f189e5" }, { "dataPath": "params_shard_272.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.61.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "00ad826ca98127b8fbe0807634cf0578" }, { "dataPath": "params_shard_273.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.61.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "519363f75f273b5efb8f963a708a9232" }, { "dataPath": "params_shard_274.bin", "format": "raw-shard", "nbytes": 33112064, "records": [ { "name": "model.layers.59.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.59.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.60.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7356416 }, { "name": "model.layers.60.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 7372800 }, { "name": "model.layers.60.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 11042816 }, { "name": "model.layers.60.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 18382848 }, { "name": "model.layers.60.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 18399232 }, { "name": "model.layers.60.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 19709952 }, { "name": "model.layers.61.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20758528 }, { "name": "model.layers.61.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 20774912 }, { "name": "model.layers.61.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 24444928 }, { "name": "model.layers.61.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 31784960 }, { "name": "model.layers.61.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 31801344 } ], "md5sum": "34a88b15e88b088696af6f12332ffe2c" }, { "dataPath": "params_shard_275.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.62.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "918ab83f025cdc5ccfb50587ffea4a27" }, { "dataPath": "params_shard_276.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.62.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "69eb8ef191e719f06049eb32d19b4311" }, { "dataPath": "params_shard_277.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.62.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "fb47ac4c681209a8b7014bd308254338" }, { "dataPath": "params_shard_278.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.62.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "a5bca37f24c4dda6cd2be32c8dd027bd" }, { "dataPath": "params_shard_279.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.63.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "c862ffda6b5e6d82da89d65cd6142a79" }, { "dataPath": "params_shard_280.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.63.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "4cc92a9f9cef65286646a3d723099d34" }, { "dataPath": "params_shard_281.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.63.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "68c39066639bedf8bc0bbea0a1136bc2" }, { "dataPath": "params_shard_282.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.63.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "e15594a016677767c69fe2369ed8e396" }, { "dataPath": "params_shard_283.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.64.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "880e475217f6e49cf3d7bcfe0e6b4582" }, { "dataPath": "params_shard_284.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.64.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "a0c3e0bba4d40dec263a336501cd71c5" }, { "dataPath": "params_shard_285.bin", "format": "raw-shard", "nbytes": 31522816, "records": [ { "name": "model.layers.61.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 0 }, { "name": "model.layers.62.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 1048576 }, { "name": "model.layers.62.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2359296 }, { "name": "model.layers.62.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 2375680 }, { "name": "model.layers.62.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 6045696 }, { "name": "model.layers.62.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 13385728 }, { "name": "model.layers.62.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 13402112 }, { "name": "model.layers.63.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14450688 }, { "name": "model.layers.63.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 14467072 }, { "name": "model.layers.63.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 18137088 }, { "name": "model.layers.63.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 25477120 }, { "name": "model.layers.63.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 25493504 }, { "name": "model.layers.63.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26804224 }, { "name": "model.layers.64.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 27852800 } ], "md5sum": "b9980c53cea795991eba383378d66e46" }, { "dataPath": "params_shard_286.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.64.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "d1d8e94c4403ce45bf81e73479409911" }, { "dataPath": "params_shard_287.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.64.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "3b16b61327c15c58ddaaee4ac2001019" }, { "dataPath": "params_shard_288.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.65.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "0fb73a9fda457404af8dba51a6ca7a0a" }, { "dataPath": "params_shard_289.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.65.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "ac1d936a831f5601a713174fcdaf3515" }, { "dataPath": "params_shard_290.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.65.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "dea3c1a625b9e661a0920b68c5e42cbb" }, { "dataPath": "params_shard_291.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.65.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "442bc2bc0c3f4b5bd2c530895b642616" }, { "dataPath": "params_shard_292.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.66.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "8dde10e00c10164e3a59961673c3a868" }, { "dataPath": "params_shard_293.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.66.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "e4d4a39ca00662d7c91fc5f0ccd699c4" }, { "dataPath": "params_shard_294.bin", "format": "raw-shard", "nbytes": 26820608, "records": [ { "name": "model.layers.64.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.64.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 7340032 }, { "name": "model.layers.64.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8650752 }, { "name": "model.layers.64.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9699328 }, { "name": "model.layers.64.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9715712 }, { "name": "model.layers.65.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9732096 }, { "name": "model.layers.65.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 9748480 }, { "name": "model.layers.65.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 13418496 }, { "name": "model.layers.65.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20758528 }, { "name": "model.layers.65.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 20774912 }, { "name": "model.layers.65.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22085632 }, { "name": "model.layers.66.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23134208 }, { "name": "model.layers.66.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 23150592 } ], "md5sum": "acf89a5b183e4723e9a934da89f62a65" }, { "dataPath": "params_shard_295.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.66.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "5b1d9ea93e6b49a56c1c6c408fd07e2a" }, { "dataPath": "params_shard_296.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.66.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "ccbe1f8a1b252eae2e5b21c41ac04d5c" }, { "dataPath": "params_shard_297.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.67.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "2fdec3525a02964691df6cff1d5173d0" }, { "dataPath": "params_shard_298.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.67.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "7acb4fc2b78135afe0957929f9356948" }, { "dataPath": "params_shard_299.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.67.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "871249e564dbb0f60d3d48846d4e01dd" }, { "dataPath": "params_shard_300.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.67.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "d3a801feb9eb9939de583c37ced9b3d0" }, { "dataPath": "params_shard_301.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.68.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "074cc27f5a2fe5a0437e9787fe4ca622" }, { "dataPath": "params_shard_302.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.68.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "d9a5f76d6b3c852270e58f259a0d766d" }, { "dataPath": "params_shard_303.bin", "format": "raw-shard", "nbytes": 26804224, "records": [ { "name": "model.layers.66.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.66.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.66.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 7356416 }, { "name": "model.layers.66.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8667136 }, { "name": "model.layers.67.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 9715712 }, { "name": "model.layers.67.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 17055744 }, { "name": "model.layers.67.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 18366464 }, { "name": "model.layers.67.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19415040 }, { "name": "model.layers.67.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 19431424 }, { "name": "model.layers.67.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23101440 }, { "name": "model.layers.68.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23117824 }, { "name": "model.layers.68.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 23134208 } ], "md5sum": "be4f1e4c71b00b708c64501f0ae9dd48" }, { "dataPath": "params_shard_304.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.68.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "baa19ea2ebf67b7f7d11947877320913" }, { "dataPath": "params_shard_305.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.68.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "6d13ec1496cb10ccd339553058dc3450" }, { "dataPath": "params_shard_306.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.69.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "ecd81f4c3c9ddf3b7f52a5d618071fb6" }, { "dataPath": "params_shard_307.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.69.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "f011ee52510087c16f05c7778bc6be0b" }, { "dataPath": "params_shard_308.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.69.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "b8d6f98e92a43245a0d43f021538a8b7" }, { "dataPath": "params_shard_309.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.69.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "8b5f29c5080f52b5fdc827f82f22d0a2" }, { "dataPath": "params_shard_310.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.70.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "2cd6cf98cd4882e0350b50c6ef958d28" }, { "dataPath": "params_shard_311.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.70.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "8d51ed6f08f80467db22c0ad200f6b8e" }, { "dataPath": "params_shard_312.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "203be18c43b17a08663c544eedc5a82f" }, { "dataPath": "params_shard_313.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "d51fc7dabe1da499343af73240f57f7c" }, { "dataPath": "params_shard_314.bin", "format": "raw-shard", "nbytes": 29163520, "records": [ { "name": "model.layers.68.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.68.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.68.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 7356416 }, { "name": "model.layers.68.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8667136 }, { "name": "model.layers.69.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9715712 }, { "name": "model.layers.69.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 9732096 }, { "name": "model.layers.69.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 13402112 }, { "name": "model.layers.69.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20742144 }, { "name": "model.layers.69.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 20758528 }, { "name": "model.layers.69.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22069248 }, { "name": "model.layers.70.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 23117824 }, { "name": "model.layers.70.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 24428544 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 25477120 }, { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 25493504 } ], "md5sum": "00b98cc7390078bc3953531ded67740f" }, { "dataPath": "params_shard_315.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "0faa8e355d7aad1e725d3c8a1e610200" }, { "dataPath": "params_shard_316.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "227eb2ab665640bcdeee8bf1a96c6764" }, { "dataPath": "params_shard_317.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "15e29ee2b2c2ca39c299c749aca7a46b" }, { "dataPath": "params_shard_318.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.8.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "01bc0389e6549202a2fa900409b3d538" }, { "dataPath": "params_shard_319.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "ce200879db95cc06f0007f2608b23bc0" }, { "dataPath": "params_shard_320.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "711f414360b7815bc70d2778c3abd819" }, { "dataPath": "params_shard_321.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.9.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "0674efc61284abe961792742e33d1aa3" }, { "dataPath": "params_shard_322.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "f59ea95303e06f638a2d1976c1c2a9f3" }, { "dataPath": "params_shard_323.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.70.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "cf0a3f0c5fa44e43b9d9217aeae4e4d2" }, { "dataPath": "params_shard_324.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.70.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "0864b41fde4185a823d558e4ef73da79" }, { "dataPath": "params_shard_325.bin", "format": "raw-shard", "nbytes": 31522816, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 7356416 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 8404992 }, { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 8421376 }, { "name": "model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 12091392 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 19431424 }, { "name": "model.layers.8.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 19447808 }, { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 20758528 }, { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 21807104 }, { "name": "model.layers.9.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 25477120 }, { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26787840 }, { "name": "model.layers.70.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 27836416 }, { "name": "model.layers.70.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 27852800 } ], "md5sum": "7f2446c764714d41628eb9d3ef07e13f" }, { "dataPath": "params_shard_326.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.71.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "09b16e9e6463733681af5b7699efaac1" }, { "dataPath": "params_shard_327.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.71.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "a31e8bd9d8a84ba55bf23b882ab23a8c" }, { "dataPath": "params_shard_328.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.71.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "5d0b74d50d55bfb3c85b95c9d3fea2a5" }, { "dataPath": "params_shard_329.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.71.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "aa7a4c2e6074a0c0dfea5742128eb91e" }, { "dataPath": "params_shard_330.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.72.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "7bf5f50704dc767d857cd2b0c1ea6f6d" }, { "dataPath": "params_shard_331.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.72.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "e60f8d042602326ecb90614c8800d34e" }, { "dataPath": "params_shard_332.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.72.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "e4ec870b73ed63b7fdb2f8cc7472131b" }, { "dataPath": "params_shard_333.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.72.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "480861a5f962ef003768b1043fae1e16" }, { "dataPath": "params_shard_334.bin", "format": "raw-shard", "nbytes": 33112064, "records": [ { "name": "model.layers.70.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.70.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.71.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7356416 }, { "name": "model.layers.71.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 7372800 }, { "name": "model.layers.71.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 11042816 }, { "name": "model.layers.71.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 18382848 }, { "name": "model.layers.71.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 18399232 }, { "name": "model.layers.71.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 19709952 }, { "name": "model.layers.72.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20758528 }, { "name": "model.layers.72.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 20774912 }, { "name": "model.layers.72.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 24444928 }, { "name": "model.layers.72.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 31784960 }, { "name": "model.layers.72.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 31801344 } ], "md5sum": "a836a59b454129c4a614d510f2f11a41" }, { "dataPath": "params_shard_335.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.73.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "1b4a4e618ca30d6100eca83c7d920301" }, { "dataPath": "params_shard_336.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.73.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "b4ae007f59204108781762ec69387b99" }, { "dataPath": "params_shard_337.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.73.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "4c7afac2f699fd06eba1152541e3f086" }, { "dataPath": "params_shard_338.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.73.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "30c539f997d3b56ded52ee52cf0076f8" }, { "dataPath": "params_shard_339.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.74.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "be93926002f841b607cace0c96a3f7e9" }, { "dataPath": "params_shard_340.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.74.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "ae553f1b9b98a3a79871f403cc083e66" }, { "dataPath": "params_shard_341.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.74.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "1dc981b4aa615a76255e209f9e395223" }, { "dataPath": "params_shard_342.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.74.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "1691f6fe26ccbf482710d3f55848d6fb" }, { "dataPath": "params_shard_343.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.75.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "1981dd900f7967c9155914cd2d24f0cf" }, { "dataPath": "params_shard_344.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.75.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "1c4ed9e740abbcc099561340f44f30d1" }, { "dataPath": "params_shard_345.bin", "format": "raw-shard", "nbytes": 31522816, "records": [ { "name": "model.layers.72.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 0 }, { "name": "model.layers.73.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 1048576 }, { "name": "model.layers.73.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2359296 }, { "name": "model.layers.73.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 2375680 }, { "name": "model.layers.73.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 6045696 }, { "name": "model.layers.73.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 13385728 }, { "name": "model.layers.73.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 13402112 }, { "name": "model.layers.74.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14450688 }, { "name": "model.layers.74.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 14467072 }, { "name": "model.layers.74.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 18137088 }, { "name": "model.layers.74.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 25477120 }, { "name": "model.layers.74.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 25493504 }, { "name": "model.layers.74.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 26804224 }, { "name": "model.layers.75.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 27852800 } ], "md5sum": "70e40e0c0200ae648b0416a48e9176b9" }, { "dataPath": "params_shard_346.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.75.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "f4980656694749f9de275374b5e39318" }, { "dataPath": "params_shard_347.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.75.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "4faa252f41e91c656f23891d58fbd0bc" }, { "dataPath": "params_shard_348.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.76.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "d4ff4cee793bbb7656ef11bc21183cfb" }, { "dataPath": "params_shard_349.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.76.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "4c4a6d09ef85836b28bd3687abcaaf41" }, { "dataPath": "params_shard_350.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.76.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "fe86a92feafed3660406c675b369b536" }, { "dataPath": "params_shard_351.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.76.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "b13b5628a4b800adfbf2e27e7d4b1f72" }, { "dataPath": "params_shard_352.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.77.mlp.down_proj.q_weight", "shape": [ 3584, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "4862b3223b06b696a7ac019c54b2055e" }, { "dataPath": "params_shard_353.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.77.mlp.gate_up_proj.q_weight", "shape": [ 1024, 57344 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "6fdeabe3faeea7b35ef7b208f97779b5" }, { "dataPath": "params_shard_354.bin", "format": "raw-shard", "nbytes": 26820608, "records": [ { "name": "model.layers.75.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.75.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 7340032 }, { "name": "model.layers.75.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8650752 }, { "name": "model.layers.75.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9699328 }, { "name": "model.layers.75.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9715712 }, { "name": "model.layers.76.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 9732096 }, { "name": "model.layers.76.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 9748480 }, { "name": "model.layers.76.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 13418496 }, { "name": "model.layers.76.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 20758528 }, { "name": "model.layers.76.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 20774912 }, { "name": "model.layers.76.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 22085632 }, { "name": "model.layers.77.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 23134208 }, { "name": "model.layers.77.mlp.down_proj.q_scale", "shape": [ 224, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3670016, "byteOffset": 23150592 } ], "md5sum": "5dcbaed43f26942b7bbc139c3d33103a" }, { "dataPath": "params_shard_355.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.77.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "074aac6482218ce02904d800a072c0c3" }, { "dataPath": "params_shard_356.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.77.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "b15764f63a84b4ffc62994ab64ad0b65" }, { "dataPath": "params_shard_357.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.78.self_attn.qkv_proj.q_weight", "shape": [ 1024, 10240 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "14928c655c48b3db3de82748562841cc" }, { "dataPath": "params_shard_358.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.78.self_attn.o_proj.q_weight", "shape": [ 1024, 8192 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "fa2a8bde080a3a6ea3caf94b80c39746" }, { "dataPath": "params_shard_359.bin", "format": "raw-shard", "nbytes": 12075008, "records": [ { "name": "model.layers.77.mlp.gate_up_proj.q_scale", "shape": [ 64, 57344 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7340032, "byteOffset": 0 }, { "name": "model.layers.77.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 7340032 }, { "name": "model.layers.77.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 7356416 }, { "name": "model.layers.77.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 8667136 }, { "name": "model.layers.78.self_attn.qkv_proj.q_scale", "shape": [ 64, 10240 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 9715712 }, { "name": "model.layers.78.self_attn.o_proj.q_scale", "shape": [ 64, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1048576, "byteOffset": 11026432 } ], "md5sum": "24d342f4bb45c2dc92a14a972228fb69" } ] }