diff --git "a/ndarray-cache.json" "b/ndarray-cache.json" new file mode 100644--- /dev/null +++ "b/ndarray-cache.json" @@ -0,0 +1,4319 @@ +{ + "metadata": { + "ParamSize": 325, + "ParamBytes": 3631664128.0, + "BitsPerParam": 3.617978559693305 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 211365888, + "records": [ + { + "name": "lm_head.q_weight", + "shape": [ + 128256, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 211365888, + "byteOffset": 0 + } + ], + "md5sum": "568f86ab316027f797575a37c50a79d9" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 23527424, + "records": [ + { + "name": "model.layers.31.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1436 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 23527424, + "byteOffset": 0 + } + ], + "md5sum": "1578a3fbfd9c0a05504e325dea920e72" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 211365888, + "records": [ + { + "name": "model.embed_tokens.q_weight", + "shape": [ + 128256, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 211365888, + "byteOffset": 0 + } + ], + "md5sum": "23ee1db64fe9ec1de25677c5d5242b84" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 26420736, + "records": [ + { + "name": "model.embed_tokens.q_scale", + "shape": [ + 128256, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 26420736, + "byteOffset": 0 + } + ], + "md5sum": "da547dd4e70f656eb8f40d6d8956af59" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 23527424, + "records": [ + { + "name": "model.layers.0.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1436 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 23527424, + "byteOffset": 0 + } + ], + "md5sum": "4c0063977321d384394a5f99388b0ea0" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 47251456, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 47251456, + "byteOffset": 0 + } + ], + "md5sum": "18b2616dcbd2d615ed4fdeb36273e640" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 32335360, + "records": [ + { + "name": "lm_head.q_scale", + "shape": [ + 128256, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 26420736, + "byteOffset": 0 + }, + { + "name": "model.layers.31.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26420736 + }, + { + "name": "model.layers.31.mlp.down_proj.q_scale", + "shape": [ + 4096, + 359 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2940928, + "byteOffset": 26428928 + }, + { + "name": "model.layers.31.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 29369856 + }, + { + "name": "model.norm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 29378048 + }, + { + "name": "model.layers.0.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 29386240 + }, + { + "name": "model.layers.0.mlp.down_proj.q_scale", + "shape": [ + 4096, + 359 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2940928, + "byteOffset": 29394432 + } + ], + "md5sum": "04db2318900e8b3a476a5f39405ca604" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 23527424, + "records": [ + { + "name": "model.layers.1.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1436 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 23527424, + "byteOffset": 0 + } + ], + "md5sum": "bfabb7a218ce24bacacb141ae1266bc6" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 47251456, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 47251456, + "byteOffset": 0 + } + ], + "md5sum": "3053dd5dc0f8fe2a3126490d692373b0" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 27848704, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5906432, + "byteOffset": 0 + }, + { + "name": "model.layers.0.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 5906432 + }, + { + "name": "model.layers.0.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10125312, + "byteOffset": 5914624 + }, + { + "name": "model.layers.0.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1265664, + "byteOffset": 16039936 + }, + { + "name": "model.layers.0.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 17305600 + }, + { + "name": "model.layers.0.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 24055808 + }, + { + "name": "model.layers.1.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24899584 + }, + { + "name": "model.layers.1.mlp.down_proj.q_scale", + "shape": [ + 4096, + 359 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2940928, + "byteOffset": 24907776 + } + ], + "md5sum": "6537d6f37aeca14588943e407c1d7e33" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 23527424, + "records": [ + { + "name": "model.layers.2.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1436 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 23527424, + "byteOffset": 0 + } + ], + "md5sum": "a813b9b9d3f218e50f41107798ecba8b" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 47251456, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 47251456, + "byteOffset": 0 + } + ], + "md5sum": "d986c5b85af192cc860c86d3c47c6a8e" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 27848704, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5906432, + "byteOffset": 0 + }, + { + "name": "model.layers.1.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 5906432 + }, + { + "name": "model.layers.1.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10125312, + "byteOffset": 5914624 + }, + { + "name": "model.layers.1.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1265664, + "byteOffset": 16039936 + }, + { + "name": "model.layers.1.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 17305600 + }, + { + "name": "model.layers.1.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 24055808 + }, + { + "name": "model.layers.2.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24899584 + }, + { + "name": "model.layers.2.mlp.down_proj.q_scale", + "shape": [ + 4096, + 359 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2940928, + "byteOffset": 24907776 + } + ], + "md5sum": "91da888975c27df5030f4be6c0f0a3ac" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 23527424, + "records": [ + { + "name": "model.layers.3.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1436 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 23527424, + "byteOffset": 0 + } + ], + "md5sum": "b1c211f9d5344cecf879f59f07b26d75" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 47251456, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 47251456, + "byteOffset": 0 + } + ], + "md5sum": "c23f74844583031d99a423be9becbb2f" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 27848704, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5906432, + "byteOffset": 0 + }, + { + "name": "model.layers.2.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 5906432 + }, + { + "name": "model.layers.2.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10125312, + "byteOffset": 5914624 + }, + { + "name": "model.layers.2.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1265664, + "byteOffset": 16039936 + }, + { + "name": "model.layers.2.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 17305600 + }, + { + "name": "model.layers.2.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 24055808 + }, + { + "name": "model.layers.3.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24899584 + }, + { + "name": "model.layers.3.mlp.down_proj.q_scale", + "shape": [ + 4096, + 359 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2940928, + "byteOffset": 24907776 + } + ], + "md5sum": "2fe50ab1a77436f6fca52ad29bc00d97" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 23527424, + "records": [ + { + "name": "model.layers.4.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1436 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 23527424, + "byteOffset": 0 + } + ], + "md5sum": "3414b7fd721798e55838356b70b9aa24" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 47251456, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 47251456, + "byteOffset": 0 + } + ], + "md5sum": "ca7e39eec6adf08608f517909c05505e" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 27848704, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5906432, + "byteOffset": 0 + }, + { + "name": "model.layers.3.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 5906432 + }, + { + "name": "model.layers.3.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10125312, + "byteOffset": 5914624 + }, + { + "name": "model.layers.3.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1265664, + "byteOffset": 16039936 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 17305600 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 24055808 + }, + { + "name": "model.layers.4.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24899584 + }, + { + "name": "model.layers.4.mlp.down_proj.q_scale", + "shape": [ + 4096, + 359 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2940928, + "byteOffset": 24907776 + } + ], + "md5sum": "ee09f06f66cf059ecda5a1c940aa9199" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 23527424, + "records": [ + { + "name": "model.layers.5.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1436 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 23527424, + "byteOffset": 0 + } + ], + "md5sum": "5637334db71603abf174799906e0f087" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 47251456, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 47251456, + "byteOffset": 0 + } + ], + "md5sum": "8b7b8e43ec0c1f677d796ecd79d0a63e" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 27848704, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5906432, + "byteOffset": 0 + }, + { + "name": "model.layers.4.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 5906432 + }, + { + "name": "model.layers.4.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10125312, + "byteOffset": 5914624 + }, + { + "name": "model.layers.4.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1265664, + "byteOffset": 16039936 + }, + { + "name": "model.layers.4.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 17305600 + }, + { + "name": "model.layers.4.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 24055808 + }, + { + "name": "model.layers.5.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24899584 + }, + { + "name": "model.layers.5.mlp.down_proj.q_scale", + "shape": [ + 4096, + 359 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2940928, + "byteOffset": 24907776 + } + ], + "md5sum": "3bc76f625060c4396dd9246a8018e10a" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 23527424, + "records": [ + { + "name": "model.layers.6.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1436 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 23527424, + "byteOffset": 0 + } + ], + "md5sum": "1821dfa1e5488796db2f5f92aaa3973e" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 47251456, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 47251456, + "byteOffset": 0 + } + ], + "md5sum": "4e7525c79ff779864a1c07397dcbe907" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 27848704, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5906432, + "byteOffset": 0 + }, + { + "name": "model.layers.5.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 5906432 + }, + { + "name": "model.layers.5.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10125312, + "byteOffset": 5914624 + }, + { + "name": "model.layers.5.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1265664, + "byteOffset": 16039936 + }, + { + "name": "model.layers.5.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 17305600 + }, + { + "name": "model.layers.5.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 24055808 + }, + { + "name": "model.layers.6.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24899584 + }, + { + "name": "model.layers.6.mlp.down_proj.q_scale", + "shape": [ + 4096, + 359 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2940928, + "byteOffset": 24907776 + } + ], + "md5sum": "39dd18f3e7f31524498d2362ae0d88e7" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 23527424, + "records": [ + { + "name": "model.layers.7.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1436 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 23527424, + "byteOffset": 0 + } + ], + "md5sum": "54e49b38a4942349f998354fae2f3bb5" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 47251456, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 47251456, + "byteOffset": 0 + } + ], + "md5sum": "c168d4c71bc53600e7831fbd0893c2b1" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 27848704, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5906432, + "byteOffset": 0 + }, + { + "name": "model.layers.6.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 5906432 + }, + { + "name": "model.layers.6.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10125312, + "byteOffset": 5914624 + }, + { + "name": "model.layers.6.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1265664, + "byteOffset": 16039936 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 17305600 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 24055808 + }, + { + "name": "model.layers.7.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24899584 + }, + { + "name": "model.layers.7.mlp.down_proj.q_scale", + "shape": [ + 4096, + 359 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2940928, + "byteOffset": 24907776 + } + ], + "md5sum": "c2c9338db55bc12b4939de7116619a23" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 23527424, + "records": [ + { + "name": "model.layers.8.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1436 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 23527424, + "byteOffset": 0 + } + ], + "md5sum": "280ddf5d13ea211c1a578d00d3f75c53" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 47251456, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 47251456, + "byteOffset": 0 + } + ], + "md5sum": "7a7eeca7e67cdec2172a37e998219112" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 27848704, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5906432, + "byteOffset": 0 + }, + { + "name": "model.layers.7.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 5906432 + }, + { + "name": "model.layers.7.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10125312, + "byteOffset": 5914624 + }, + { + "name": "model.layers.7.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1265664, + "byteOffset": 16039936 + }, + { + "name": "model.layers.7.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 17305600 + }, + { + "name": "model.layers.7.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 24055808 + }, + { + "name": "model.layers.8.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24899584 + }, + { + "name": "model.layers.8.mlp.down_proj.q_scale", + "shape": [ + 4096, + 359 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2940928, + "byteOffset": 24907776 + } + ], + "md5sum": "b95dfc8ab046c98727b0ad947169d485" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 23527424, + "records": [ + { + "name": "model.layers.10.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1436 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 23527424, + "byteOffset": 0 + } + ], + "md5sum": "29bfd601b95e1b1a5a737f19d734d2bb" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 47251456, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 47251456, + "byteOffset": 0 + } + ], + "md5sum": "a8e34d73ab78e8cbac2852b97c969726" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 27848704, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5906432, + "byteOffset": 0 + }, + { + "name": "model.layers.8.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 5906432 + }, + { + "name": "model.layers.8.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10125312, + "byteOffset": 5914624 + }, + { + "name": "model.layers.8.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1265664, + "byteOffset": 16039936 + }, + { + "name": "model.layers.8.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 17305600 + }, + { + "name": "model.layers.8.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 24055808 + }, + { + "name": "model.layers.10.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24899584 + }, + { + "name": "model.layers.10.mlp.down_proj.q_scale", + "shape": [ + 4096, + 359 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2940928, + "byteOffset": 24907776 + } + ], + "md5sum": "c678de2417509d06f72eaa765812cccd" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 23527424, + "records": [ + { + "name": "model.layers.11.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1436 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 23527424, + "byteOffset": 0 + } + ], + "md5sum": "fe1fccf462970016d5223f96093e443b" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 47251456, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 47251456, + "byteOffset": 0 + } + ], + "md5sum": "cf140ea96d465ba892a689c9e6dcc0ac" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 27848704, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5906432, + "byteOffset": 0 + }, + { + "name": "model.layers.10.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 5906432 + }, + { + "name": "model.layers.10.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10125312, + "byteOffset": 5914624 + }, + { + "name": "model.layers.10.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1265664, + "byteOffset": 16039936 + }, + { + "name": "model.layers.10.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 17305600 + }, + { + "name": "model.layers.10.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 24055808 + }, + { + "name": "model.layers.11.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24899584 + }, + { + "name": "model.layers.11.mlp.down_proj.q_scale", + "shape": [ + 4096, + 359 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2940928, + "byteOffset": 24907776 + } + ], + "md5sum": "a6ba7ac350d40a571ded4a7dab065266" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 23527424, + "records": [ + { + "name": "model.layers.12.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1436 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 23527424, + "byteOffset": 0 + } + ], + "md5sum": "850133d80dce2b5ef4585ab5c34a2257" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 47251456, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 47251456, + "byteOffset": 0 + } + ], + "md5sum": "e9e316c935f33dea658f4a68fd018f64" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 27848704, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5906432, + "byteOffset": 0 + }, + { + "name": "model.layers.11.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 5906432 + }, + { + "name": "model.layers.11.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10125312, + "byteOffset": 5914624 + }, + { + "name": "model.layers.11.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1265664, + "byteOffset": 16039936 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 17305600 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 24055808 + }, + { + "name": "model.layers.12.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24899584 + }, + { + "name": "model.layers.12.mlp.down_proj.q_scale", + "shape": [ + 4096, + 359 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2940928, + "byteOffset": 24907776 + } + ], + "md5sum": "bacca3d21a214c3f62bf8c67c81a3959" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 23527424, + "records": [ + { + "name": "model.layers.13.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1436 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 23527424, + "byteOffset": 0 + } + ], + "md5sum": "1fab769c9c566e4daac107cf9e926e44" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 47251456, + "records": [ + { + "name": "model.layers.13.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 47251456, + "byteOffset": 0 + } + ], + "md5sum": "cf55f203633848e2f0227f0efe9b5322" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 27848704, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5906432, + "byteOffset": 0 + }, + { + "name": "model.layers.12.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 5906432 + }, + { + "name": "model.layers.12.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10125312, + "byteOffset": 5914624 + }, + { + "name": "model.layers.12.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1265664, + "byteOffset": 16039936 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 17305600 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 24055808 + }, + { + "name": "model.layers.13.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24899584 + }, + { + "name": "model.layers.13.mlp.down_proj.q_scale", + "shape": [ + 4096, + 359 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2940928, + "byteOffset": 24907776 + } + ], + "md5sum": "faa950a2b1ffe74e988fffff5e7de265" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 23527424, + "records": [ + { + "name": "model.layers.14.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1436 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 23527424, + "byteOffset": 0 + } + ], + "md5sum": "8bc4e6994fe102d387098381d3e4abb7" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 47251456, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 47251456, + "byteOffset": 0 + } + ], + "md5sum": "3ebd799be150fca9747f3c7884b72c0f" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 27848704, + "records": [ + { + "name": "model.layers.13.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5906432, + "byteOffset": 0 + }, + { + "name": "model.layers.13.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 5906432 + }, + { + "name": "model.layers.13.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10125312, + "byteOffset": 5914624 + }, + { + "name": "model.layers.13.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1265664, + "byteOffset": 16039936 + }, + { + "name": "model.layers.13.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 17305600 + }, + { + "name": "model.layers.13.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 24055808 + }, + { + "name": "model.layers.14.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24899584 + }, + { + "name": "model.layers.14.mlp.down_proj.q_scale", + "shape": [ + 4096, + 359 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2940928, + "byteOffset": 24907776 + } + ], + "md5sum": "27cd361e5e78a1eec5631aa776ca53ac" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 23527424, + "records": [ + { + "name": "model.layers.15.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1436 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 23527424, + "byteOffset": 0 + } + ], + "md5sum": "2f17e1d4222bc38f9ecb9de2d6bf321e" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 47251456, + "records": [ + { + "name": "model.layers.15.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 47251456, + "byteOffset": 0 + } + ], + "md5sum": "29c3171cdcb9342d608b5254693449c4" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 27848704, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5906432, + "byteOffset": 0 + }, + { + "name": "model.layers.14.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 5906432 + }, + { + "name": "model.layers.14.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10125312, + "byteOffset": 5914624 + }, + { + "name": "model.layers.14.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1265664, + "byteOffset": 16039936 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 17305600 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 24055808 + }, + { + "name": "model.layers.15.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24899584 + }, + { + "name": "model.layers.15.mlp.down_proj.q_scale", + "shape": [ + 4096, + 359 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2940928, + "byteOffset": 24907776 + } + ], + "md5sum": "99df10eabb8504bc28387c07eaf933c4" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 23527424, + "records": [ + { + "name": "model.layers.16.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1436 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 23527424, + "byteOffset": 0 + } + ], + "md5sum": "f8426472418218e5a52c7276f69fd3e9" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 47251456, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 47251456, + "byteOffset": 0 + } + ], + "md5sum": "af71671e925b52708f85588c9cb91993" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 27848704, + "records": [ + { + "name": "model.layers.15.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5906432, + "byteOffset": 0 + }, + { + "name": "model.layers.15.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 5906432 + }, + { + "name": "model.layers.15.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10125312, + "byteOffset": 5914624 + }, + { + "name": "model.layers.15.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1265664, + "byteOffset": 16039936 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 17305600 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 24055808 + }, + { + "name": "model.layers.16.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24899584 + }, + { + "name": "model.layers.16.mlp.down_proj.q_scale", + "shape": [ + 4096, + 359 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2940928, + "byteOffset": 24907776 + } + ], + "md5sum": "3a1f25f50e9e7b2bf9c652b1713bbd1c" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 23527424, + "records": [ + { + "name": "model.layers.17.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1436 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 23527424, + "byteOffset": 0 + } + ], + "md5sum": "87dd9c259feeca2057c5bca3182f1224" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 47251456, + "records": [ + { + "name": "model.layers.17.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 47251456, + "byteOffset": 0 + } + ], + "md5sum": "0e6fa563aa1cf5fc5d804aaaca0cd162" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 27848704, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5906432, + "byteOffset": 0 + }, + { + "name": "model.layers.16.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 5906432 + }, + { + "name": "model.layers.16.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10125312, + "byteOffset": 5914624 + }, + { + "name": "model.layers.16.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1265664, + "byteOffset": 16039936 + }, + { + "name": "model.layers.16.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 17305600 + }, + { + "name": "model.layers.16.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 24055808 + }, + { + "name": "model.layers.17.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24899584 + }, + { + "name": "model.layers.17.mlp.down_proj.q_scale", + "shape": [ + 4096, + 359 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2940928, + "byteOffset": 24907776 + } + ], + "md5sum": "b8bb9e5290b756380dc7a8d2796a92c4" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 23527424, + "records": [ + { + "name": "model.layers.18.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1436 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 23527424, + "byteOffset": 0 + } + ], + "md5sum": "19dfd70ccff2a502b9fba99607a187ef" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 47251456, + "records": [ + { + "name": "model.layers.18.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 47251456, + "byteOffset": 0 + } + ], + "md5sum": "899a24caced2a7ef73c3b01a319ce22b" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 27848704, + "records": [ + { + "name": "model.layers.17.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5906432, + "byteOffset": 0 + }, + { + "name": "model.layers.17.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 5906432 + }, + { + "name": "model.layers.17.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10125312, + "byteOffset": 5914624 + }, + { + "name": "model.layers.17.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1265664, + "byteOffset": 16039936 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 17305600 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 24055808 + }, + { + "name": "model.layers.18.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24899584 + }, + { + "name": "model.layers.18.mlp.down_proj.q_scale", + "shape": [ + 4096, + 359 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2940928, + "byteOffset": 24907776 + } + ], + "md5sum": "20141f59bae4a9454011b04858dcd8ef" + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 23527424, + "records": [ + { + "name": "model.layers.19.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1436 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 23527424, + "byteOffset": 0 + } + ], + "md5sum": "fc02c7aee4e2286cdf58ececeeb84bae" + }, + { + "dataPath": "params_shard_59.bin", + "format": "raw-shard", + "nbytes": 47251456, + "records": [ + { + "name": "model.layers.19.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 47251456, + "byteOffset": 0 + } + ], + "md5sum": "3d43c94cb4e69ab4c9513f4624a22eb5" + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 27848704, + "records": [ + { + "name": "model.layers.18.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5906432, + "byteOffset": 0 + }, + { + "name": "model.layers.18.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 5906432 + }, + { + "name": "model.layers.18.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10125312, + "byteOffset": 5914624 + }, + { + "name": "model.layers.18.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1265664, + "byteOffset": 16039936 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 17305600 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 24055808 + }, + { + "name": "model.layers.19.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24899584 + }, + { + "name": "model.layers.19.mlp.down_proj.q_scale", + "shape": [ + 4096, + 359 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2940928, + "byteOffset": 24907776 + } + ], + "md5sum": "41c14bb0350a9406d97e07ef75fcd37d" + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 47251456, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 47251456, + "byteOffset": 0 + } + ], + "md5sum": "fc1c00e96fdca8dbc3536a7269ce8ad2" + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 30806016, + "records": [ + { + "name": "model.layers.19.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5906432, + "byteOffset": 0 + }, + { + "name": "model.layers.19.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 5906432 + }, + { + "name": "model.layers.19.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10125312, + "byteOffset": 5914624 + }, + { + "name": "model.layers.19.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1265664, + "byteOffset": 16039936 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 17305600 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 24055808 + }, + { + "name": "model.layers.20.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5906432, + "byteOffset": 24899584 + } + ], + "md5sum": "b6215c810f0ad7e1a7d37ba4d46342eb" + }, + { + "dataPath": "params_shard_63.bin", + "format": "raw-shard", + "nbytes": 23527424, + "records": [ + { + "name": "model.layers.9.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1436 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 23527424, + "byteOffset": 0 + } + ], + "md5sum": "0975dd0e8dd558b2d578621207e8a40a" + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 47251456, + "records": [ + { + "name": "model.layers.9.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 47251456, + "byteOffset": 0 + } + ], + "md5sum": "bcc8c97d89236ef2e8056117abcf6dbb" + }, + { + "dataPath": "params_shard_65.bin", + "format": "raw-shard", + "nbytes": 27848704, + "records": [ + { + "name": "model.layers.20.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10125312, + "byteOffset": 0 + }, + { + "name": "model.layers.20.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1265664, + "byteOffset": 10125312 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 11390976 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 18141184 + }, + { + "name": "model.layers.9.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18984960 + }, + { + "name": "model.layers.9.mlp.down_proj.q_scale", + "shape": [ + 4096, + 359 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2940928, + "byteOffset": 18993152 + }, + { + "name": "model.layers.9.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5906432, + "byteOffset": 21934080 + }, + { + "name": "model.layers.9.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 27840512 + } + ], + "md5sum": "f4cd00ba0d131dc4dd6bc0c354136712" + }, + { + "dataPath": "params_shard_66.bin", + "format": "raw-shard", + "nbytes": 23527424, + "records": [ + { + "name": "model.layers.20.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1436 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 23527424, + "byteOffset": 0 + } + ], + "md5sum": "b286102612cc6d104c2cd653b06394f4" + }, + { + "dataPath": "params_shard_67.bin", + "format": "raw-shard", + "nbytes": 23527424, + "records": [ + { + "name": "model.layers.21.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1436 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 23527424, + "byteOffset": 0 + } + ], + "md5sum": "7fffc4e0883fa122e43934046075f6d0" + }, + { + "dataPath": "params_shard_68.bin", + "format": "raw-shard", + "nbytes": 47251456, + "records": [ + { + "name": "model.layers.21.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 47251456, + "byteOffset": 0 + } + ], + "md5sum": "2ca8ce368c93c8b8a2196bf8d9f8ca13" + }, + { + "dataPath": "params_shard_69.bin", + "format": "raw-shard", + "nbytes": 30806016, + "records": [ + { + "name": "model.layers.9.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10125312, + "byteOffset": 0 + }, + { + "name": "model.layers.9.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1265664, + "byteOffset": 10125312 + }, + { + "name": "model.layers.9.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 11390976 + }, + { + "name": "model.layers.9.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 18141184 + }, + { + "name": "model.layers.20.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18984960 + }, + { + "name": "model.layers.20.mlp.down_proj.q_scale", + "shape": [ + 4096, + 359 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2940928, + "byteOffset": 18993152 + }, + { + "name": "model.layers.20.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21934080 + }, + { + "name": "model.layers.21.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21942272 + }, + { + "name": "model.layers.21.mlp.down_proj.q_scale", + "shape": [ + 4096, + 359 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2940928, + "byteOffset": 21950464 + }, + { + "name": "model.layers.21.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5906432, + "byteOffset": 24891392 + }, + { + "name": "model.layers.21.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 30797824 + } + ], + "md5sum": "901900f33d2b716f6da867b681f6ef12" + }, + { + "dataPath": "params_shard_70.bin", + "format": "raw-shard", + "nbytes": 23527424, + "records": [ + { + "name": "model.layers.22.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1436 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 23527424, + "byteOffset": 0 + } + ], + "md5sum": "fa637f185d1250d2bee1b52c8d6576f2" + }, + { + "dataPath": "params_shard_71.bin", + "format": "raw-shard", + "nbytes": 47251456, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 47251456, + "byteOffset": 0 + } + ], + "md5sum": "8ce369be27e9a759e33e60e37cb4c4dc" + }, + { + "dataPath": "params_shard_72.bin", + "format": "raw-shard", + "nbytes": 27848704, + "records": [ + { + "name": "model.layers.21.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10125312, + "byteOffset": 0 + }, + { + "name": "model.layers.21.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1265664, + "byteOffset": 10125312 + }, + { + "name": "model.layers.21.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 11390976 + }, + { + "name": "model.layers.21.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 18141184 + }, + { + "name": "model.layers.22.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18984960 + }, + { + "name": "model.layers.22.mlp.down_proj.q_scale", + "shape": [ + 4096, + 359 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2940928, + "byteOffset": 18993152 + }, + { + "name": "model.layers.22.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5906432, + "byteOffset": 21934080 + }, + { + "name": "model.layers.22.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 27840512 + } + ], + "md5sum": "540d6d87107681a6356d91a22a19756f" + }, + { + "dataPath": "params_shard_73.bin", + "format": "raw-shard", + "nbytes": 23527424, + "records": [ + { + "name": "model.layers.23.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1436 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 23527424, + "byteOffset": 0 + } + ], + "md5sum": "d9e3ae1a0d8d3c0c48590b9ed77f7e66" + }, + { + "dataPath": "params_shard_74.bin", + "format": "raw-shard", + "nbytes": 47251456, + "records": [ + { + "name": "model.layers.23.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 47251456, + "byteOffset": 0 + } + ], + "md5sum": "a1952eafe799252fa754b8de7856ece0" + }, + { + "dataPath": "params_shard_75.bin", + "format": "raw-shard", + "nbytes": 27848704, + "records": [ + { + "name": "model.layers.22.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10125312, + "byteOffset": 0 + }, + { + "name": "model.layers.22.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1265664, + "byteOffset": 10125312 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 11390976 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 18141184 + }, + { + "name": "model.layers.23.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18984960 + }, + { + "name": "model.layers.23.mlp.down_proj.q_scale", + "shape": [ + 4096, + 359 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2940928, + "byteOffset": 18993152 + }, + { + "name": "model.layers.23.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5906432, + "byteOffset": 21934080 + }, + { + "name": "model.layers.23.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 27840512 + } + ], + "md5sum": "8bd4963eac09df3c14493242feb2f935" + }, + { + "dataPath": "params_shard_76.bin", + "format": "raw-shard", + "nbytes": 23527424, + "records": [ + { + "name": "model.layers.24.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1436 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 23527424, + "byteOffset": 0 + } + ], + "md5sum": "d72c5a6a85b7adab49f826f8f97c0a1c" + }, + { + "dataPath": "params_shard_77.bin", + "format": "raw-shard", + "nbytes": 47251456, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 47251456, + "byteOffset": 0 + } + ], + "md5sum": "ea08c96db25455b5d94c4d54d1e79a5f" + }, + { + "dataPath": "params_shard_78.bin", + "format": "raw-shard", + "nbytes": 27848704, + "records": [ + { + "name": "model.layers.23.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10125312, + "byteOffset": 0 + }, + { + "name": "model.layers.23.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1265664, + "byteOffset": 10125312 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 11390976 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 18141184 + }, + { + "name": "model.layers.24.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18984960 + }, + { + "name": "model.layers.24.mlp.down_proj.q_scale", + "shape": [ + 4096, + 359 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2940928, + "byteOffset": 18993152 + }, + { + "name": "model.layers.24.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5906432, + "byteOffset": 21934080 + }, + { + "name": "model.layers.24.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 27840512 + } + ], + "md5sum": "14e1017109561b103b8b18ac902a2960" + }, + { + "dataPath": "params_shard_79.bin", + "format": "raw-shard", + "nbytes": 23527424, + "records": [ + { + "name": "model.layers.25.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1436 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 23527424, + "byteOffset": 0 + } + ], + "md5sum": "3acb8f5105f9e5a6ffd4c7226dd46d16" + }, + { + "dataPath": "params_shard_80.bin", + "format": "raw-shard", + "nbytes": 47251456, + "records": [ + { + "name": "model.layers.25.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 47251456, + "byteOffset": 0 + } + ], + "md5sum": "8d9e4f27e1c11b356d66a777b35bd5bc" + }, + { + "dataPath": "params_shard_81.bin", + "format": "raw-shard", + "nbytes": 27848704, + "records": [ + { + "name": "model.layers.24.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10125312, + "byteOffset": 0 + }, + { + "name": "model.layers.24.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1265664, + "byteOffset": 10125312 + }, + { + "name": "model.layers.24.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 11390976 + }, + { + "name": "model.layers.24.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 18141184 + }, + { + "name": "model.layers.25.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18984960 + }, + { + "name": "model.layers.25.mlp.down_proj.q_scale", + "shape": [ + 4096, + 359 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2940928, + "byteOffset": 18993152 + }, + { + "name": "model.layers.25.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5906432, + "byteOffset": 21934080 + }, + { + "name": "model.layers.25.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 27840512 + } + ], + "md5sum": "8499034c4f4e1cb983ed461b7f6886fd" + }, + { + "dataPath": "params_shard_82.bin", + "format": "raw-shard", + "nbytes": 23527424, + "records": [ + { + "name": "model.layers.26.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1436 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 23527424, + "byteOffset": 0 + } + ], + "md5sum": "543c76c9d5399d417d52d58f45148137" + }, + { + "dataPath": "params_shard_83.bin", + "format": "raw-shard", + "nbytes": 47251456, + "records": [ + { + "name": "model.layers.26.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 47251456, + "byteOffset": 0 + } + ], + "md5sum": "2589e5f1dafdd36e00ac24da23d6d6f2" + }, + { + "dataPath": "params_shard_84.bin", + "format": "raw-shard", + "nbytes": 27848704, + "records": [ + { + "name": "model.layers.25.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10125312, + "byteOffset": 0 + }, + { + "name": "model.layers.25.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1265664, + "byteOffset": 10125312 + }, + { + "name": "model.layers.25.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 11390976 + }, + { + "name": "model.layers.25.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 18141184 + }, + { + "name": "model.layers.26.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18984960 + }, + { + "name": "model.layers.26.mlp.down_proj.q_scale", + "shape": [ + 4096, + 359 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2940928, + "byteOffset": 18993152 + }, + { + "name": "model.layers.26.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5906432, + "byteOffset": 21934080 + }, + { + "name": "model.layers.26.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 27840512 + } + ], + "md5sum": "828098680e18438cdc47503c0c33edea" + }, + { + "dataPath": "params_shard_85.bin", + "format": "raw-shard", + "nbytes": 23527424, + "records": [ + { + "name": "model.layers.27.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1436 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 23527424, + "byteOffset": 0 + } + ], + "md5sum": "4647bb6a22b64d6935e89475fa6f3265" + }, + { + "dataPath": "params_shard_86.bin", + "format": "raw-shard", + "nbytes": 47251456, + "records": [ + { + "name": "model.layers.27.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 47251456, + "byteOffset": 0 + } + ], + "md5sum": "44b8744d0903036e19b6c7748a13d86a" + }, + { + "dataPath": "params_shard_87.bin", + "format": "raw-shard", + "nbytes": 27848704, + "records": [ + { + "name": "model.layers.26.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10125312, + "byteOffset": 0 + }, + { + "name": "model.layers.26.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1265664, + "byteOffset": 10125312 + }, + { + "name": "model.layers.26.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 11390976 + }, + { + "name": "model.layers.26.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 18141184 + }, + { + "name": "model.layers.27.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18984960 + }, + { + "name": "model.layers.27.mlp.down_proj.q_scale", + "shape": [ + 4096, + 359 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2940928, + "byteOffset": 18993152 + }, + { + "name": "model.layers.27.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5906432, + "byteOffset": 21934080 + }, + { + "name": "model.layers.27.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 27840512 + } + ], + "md5sum": "023b3bf98b5313473439ba65ffa73f99" + }, + { + "dataPath": "params_shard_88.bin", + "format": "raw-shard", + "nbytes": 23527424, + "records": [ + { + "name": "model.layers.28.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1436 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 23527424, + "byteOffset": 0 + } + ], + "md5sum": "3afe78671ba52b432101170a2965a0b0" + }, + { + "dataPath": "params_shard_89.bin", + "format": "raw-shard", + "nbytes": 47251456, + "records": [ + { + "name": "model.layers.28.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 47251456, + "byteOffset": 0 + } + ], + "md5sum": "cf69bac6256ddffd0bc04a3fb998f5d0" + }, + { + "dataPath": "params_shard_90.bin", + "format": "raw-shard", + "nbytes": 27848704, + "records": [ + { + "name": "model.layers.27.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10125312, + "byteOffset": 0 + }, + { + "name": "model.layers.27.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1265664, + "byteOffset": 10125312 + }, + { + "name": "model.layers.27.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 11390976 + }, + { + "name": "model.layers.27.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 18141184 + }, + { + "name": "model.layers.28.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18984960 + }, + { + "name": "model.layers.28.mlp.down_proj.q_scale", + "shape": [ + 4096, + 359 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2940928, + "byteOffset": 18993152 + }, + { + "name": "model.layers.28.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5906432, + "byteOffset": 21934080 + }, + { + "name": "model.layers.28.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 27840512 + } + ], + "md5sum": "daf6dfbb769b92613f27ce82e61d3529" + }, + { + "dataPath": "params_shard_91.bin", + "format": "raw-shard", + "nbytes": 23527424, + "records": [ + { + "name": "model.layers.29.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1436 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 23527424, + "byteOffset": 0 + } + ], + "md5sum": "7be005221f09f1b305c88f28e4d88e08" + }, + { + "dataPath": "params_shard_92.bin", + "format": "raw-shard", + "nbytes": 47251456, + "records": [ + { + "name": "model.layers.29.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 47251456, + "byteOffset": 0 + } + ], + "md5sum": "349b13fc2a1d9356b0803e25520546a4" + }, + { + "dataPath": "params_shard_93.bin", + "format": "raw-shard", + "nbytes": 27848704, + "records": [ + { + "name": "model.layers.28.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10125312, + "byteOffset": 0 + }, + { + "name": "model.layers.28.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1265664, + "byteOffset": 10125312 + }, + { + "name": "model.layers.28.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 11390976 + }, + { + "name": "model.layers.28.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 18141184 + }, + { + "name": "model.layers.29.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18984960 + }, + { + "name": "model.layers.29.mlp.down_proj.q_scale", + "shape": [ + 4096, + 359 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2940928, + "byteOffset": 18993152 + }, + { + "name": "model.layers.29.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5906432, + "byteOffset": 21934080 + }, + { + "name": "model.layers.29.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 27840512 + } + ], + "md5sum": "e187378944b9259dc003a8988a7a75cc" + }, + { + "dataPath": "params_shard_94.bin", + "format": "raw-shard", + "nbytes": 23527424, + "records": [ + { + "name": "model.layers.30.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1436 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 23527424, + "byteOffset": 0 + } + ], + "md5sum": "3b804276b2053f99fc01ba0da6b7e167" + }, + { + "dataPath": "params_shard_95.bin", + "format": "raw-shard", + "nbytes": 47251456, + "records": [ + { + "name": "model.layers.30.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 47251456, + "byteOffset": 0 + } + ], + "md5sum": "dddcccdd9f2eeaa43e0c39408e5074d3" + }, + { + "dataPath": "params_shard_96.bin", + "format": "raw-shard", + "nbytes": 27848704, + "records": [ + { + "name": "model.layers.29.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10125312, + "byteOffset": 0 + }, + { + "name": "model.layers.29.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1265664, + "byteOffset": 10125312 + }, + { + "name": "model.layers.29.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 11390976 + }, + { + "name": "model.layers.29.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 18141184 + }, + { + "name": "model.layers.30.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18984960 + }, + { + "name": "model.layers.30.mlp.down_proj.q_scale", + "shape": [ + 4096, + 359 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2940928, + "byteOffset": 18993152 + }, + { + "name": "model.layers.30.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5906432, + "byteOffset": 21934080 + }, + { + "name": "model.layers.30.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 27840512 + } + ], + "md5sum": "f398b7ea94d000df8220d2395ceb4d73" + }, + { + "dataPath": "params_shard_97.bin", + "format": "raw-shard", + "nbytes": 47251456, + "records": [ + { + "name": "model.layers.31.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 47251456, + "byteOffset": 0 + } + ], + "md5sum": "091aa9f037c32414f9848f162bbdb276" + }, + { + "dataPath": "params_shard_98.bin", + "format": "raw-shard", + "nbytes": 24891392, + "records": [ + { + "name": "model.layers.30.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10125312, + "byteOffset": 0 + }, + { + "name": "model.layers.30.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1265664, + "byteOffset": 10125312 + }, + { + "name": "model.layers.30.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 11390976 + }, + { + "name": "model.layers.30.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 18141184 + }, + { + "name": "model.layers.31.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5906432, + "byteOffset": 18984960 + } + ], + "md5sum": "60ae2443d66d2a561a3425c0c270d987" + }, + { + "dataPath": "params_shard_99.bin", + "format": "raw-shard", + "nbytes": 18984960, + "records": [ + { + "name": "model.layers.31.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 10125312, + "byteOffset": 0 + }, + { + "name": "model.layers.31.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1265664, + "byteOffset": 10125312 + }, + { + "name": "model.layers.31.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 412 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 6750208, + "byteOffset": 11390976 + }, + { + "name": "model.layers.31.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 103 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 843776, + "byteOffset": 18141184 + } + ], + "md5sum": "0e985b0c456f0a98d34cc57f23d4b319" + } + ] +} \ No newline at end of file