diff --git "a/ndarray-cache.json" "b/ndarray-cache.json" new file mode 100644--- /dev/null +++ "b/ndarray-cache.json" @@ -0,0 +1,3521 @@ +{ + "metadata": { + "ParamSize": 283, + "ParamBytes": 1656834048.0, + "BitsPerParam": 4.069857841273854 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 197001216, + "records": [ + { + "name": "model.embed_tokens.q_weight", + "shape": [ + 128256, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 197001216, + "byteOffset": 0 + } + ], + "md5sum": "afed46c2626032adaa3a00e22797b92d" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "10c45988a8aa86a7a036bae52c98a8b8" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 32913408, + "records": [ + { + "name": "model.embed_tokens.q_scale", + "shape": [ + 128256, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6156288, + "byteOffset": 0 + }, + { + "name": "model.layers.0.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 6156288 + }, + { + "name": "model.layers.0.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 6162432 + }, + { + "name": "model.layers.0.mlp.down_proj.q_scale", + "shape": [ + 3072, + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 18745344 + }, + { + "name": "model.layers.0.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 19138560 + }, + { + "name": "model.layers.0.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 19924992 + }, + { + "name": "model.layers.0.self_attn.qkv_proj.q_weight", + "shape": [ + 5120, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 19931136 + }, + { + "name": "model.layers.0.self_attn.qkv_proj.q_scale", + "shape": [ + 5120, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 245760, + "byteOffset": 27795456 + }, + { + "name": "model.layers.0.self_attn.o_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 28041216 + }, + { + "name": "model.layers.0.self_attn.o_proj.q_scale", + "shape": [ + 3072, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 147456, + "byteOffset": 32759808 + }, + { + "name": "model.layers.1.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 32907264 + } + ], + "md5sum": "04a1dae5481c20346e8b8852527862ba" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "0ef724e70788f44939bf0e73e96d7a90" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 26750976, + "records": [ + { + "name": "model.layers.1.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.1.mlp.down_proj.q_scale", + "shape": [ + 3072, + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.1.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 12976128 + }, + { + "name": "model.layers.1.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 13762560 + }, + { + "name": "model.layers.1.self_attn.qkv_proj.q_weight", + "shape": [ + 5120, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 13768704 + }, + { + "name": "model.layers.1.self_attn.qkv_proj.q_scale", + "shape": [ + 5120, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 245760, + "byteOffset": 21633024 + }, + { + "name": "model.layers.1.self_attn.o_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 21878784 + }, + { + "name": "model.layers.1.self_attn.o_proj.q_scale", + "shape": [ + 3072, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 147456, + "byteOffset": 26597376 + }, + { + "name": "model.layers.10.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 26744832 + } + ], + "md5sum": "edc8c85302ddcc0715c3ccf2e529901e" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "ffc20dc34865aa146d73cdfbdaeafdbc" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 26750976, + "records": [ + { + "name": "model.layers.10.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.10.mlp.down_proj.q_scale", + "shape": [ + 3072, + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.10.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 12976128 + }, + { + "name": "model.layers.10.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 13762560 + }, + { + "name": "model.layers.10.self_attn.qkv_proj.q_weight", + "shape": [ + 5120, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 13768704 + }, + { + "name": "model.layers.10.self_attn.qkv_proj.q_scale", + "shape": [ + 5120, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 245760, + "byteOffset": 21633024 + }, + { + "name": "model.layers.10.self_attn.o_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 21878784 + }, + { + "name": "model.layers.10.self_attn.o_proj.q_scale", + "shape": [ + 3072, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 147456, + "byteOffset": 26597376 + }, + { + "name": "model.layers.11.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 26744832 + } + ], + "md5sum": "0be50caf0e03ca98071f1485c5f445f3" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "e38ab2c47d1ece68bf58c897ef052082" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 26750976, + "records": [ + { + "name": "model.layers.11.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.11.mlp.down_proj.q_scale", + "shape": [ + 3072, + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.11.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 12976128 + }, + { + "name": "model.layers.11.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 13762560 + }, + { + "name": "model.layers.11.self_attn.qkv_proj.q_weight", + "shape": [ + 5120, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 13768704 + }, + { + "name": "model.layers.11.self_attn.qkv_proj.q_scale", + "shape": [ + 5120, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 245760, + "byteOffset": 21633024 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 21878784 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_scale", + "shape": [ + 3072, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 147456, + "byteOffset": 26597376 + }, + { + "name": "model.layers.12.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 26744832 + } + ], + "md5sum": "de11c331209ac1da630fdd6b6ca41cc8" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "c83d31bdc8a32c7cd685f7780884a0a0" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 26750976, + "records": [ + { + "name": "model.layers.12.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.12.mlp.down_proj.q_scale", + "shape": [ + 3072, + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.12.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 12976128 + }, + { + "name": "model.layers.12.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 13762560 + }, + { + "name": "model.layers.12.self_attn.qkv_proj.q_weight", + "shape": [ + 5120, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 13768704 + }, + { + "name": "model.layers.12.self_attn.qkv_proj.q_scale", + "shape": [ + 5120, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 245760, + "byteOffset": 21633024 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 21878784 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_scale", + "shape": [ + 3072, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 147456, + "byteOffset": 26597376 + }, + { + "name": "model.layers.13.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 26744832 + } + ], + "md5sum": "e84bd5b63a224d068ff9cf2c31042bfe" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.13.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "ce8b583d04d00eda044d90b3a3ae4157" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 26750976, + "records": [ + { + "name": "model.layers.13.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.13.mlp.down_proj.q_scale", + "shape": [ + 3072, + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.13.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 12976128 + }, + { + "name": "model.layers.13.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 13762560 + }, + { + "name": "model.layers.13.self_attn.qkv_proj.q_weight", + "shape": [ + 5120, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 13768704 + }, + { + "name": "model.layers.13.self_attn.qkv_proj.q_scale", + "shape": [ + 5120, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 245760, + "byteOffset": 21633024 + }, + { + "name": "model.layers.13.self_attn.o_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 21878784 + }, + { + "name": "model.layers.13.self_attn.o_proj.q_scale", + "shape": [ + 3072, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 147456, + "byteOffset": 26597376 + }, + { + "name": "model.layers.14.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 26744832 + } + ], + "md5sum": "475c4991b84ba39097fa26d5f7e8dd8f" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "51ed30cbc2bc3665cf7d2777323168f0" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 26750976, + "records": [ + { + "name": "model.layers.14.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.14.mlp.down_proj.q_scale", + "shape": [ + 3072, + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.14.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 12976128 + }, + { + "name": "model.layers.14.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 13762560 + }, + { + "name": "model.layers.14.self_attn.qkv_proj.q_weight", + "shape": [ + 5120, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 13768704 + }, + { + "name": "model.layers.14.self_attn.qkv_proj.q_scale", + "shape": [ + 5120, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 245760, + "byteOffset": 21633024 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 21878784 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_scale", + "shape": [ + 3072, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 147456, + "byteOffset": 26597376 + }, + { + "name": "model.layers.15.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 26744832 + } + ], + "md5sum": "5d1577c3281078ee2401f3284940aeab" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.15.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "4da070ae30d04ff2cc6efeeea4f2a540" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 26750976, + "records": [ + { + "name": "model.layers.15.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.15.mlp.down_proj.q_scale", + "shape": [ + 3072, + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.15.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 12976128 + }, + { + "name": "model.layers.15.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 13762560 + }, + { + "name": "model.layers.15.self_attn.qkv_proj.q_weight", + "shape": [ + 5120, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 13768704 + }, + { + "name": "model.layers.15.self_attn.qkv_proj.q_scale", + "shape": [ + 5120, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 245760, + "byteOffset": 21633024 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 21878784 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_scale", + "shape": [ + 3072, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 147456, + "byteOffset": 26597376 + }, + { + "name": "model.layers.16.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 26744832 + } + ], + "md5sum": "bfbbfc67e15a76b14deb26f67255dca3" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "27471e3c4f1d553eae646f2c65a7f223" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 26750976, + "records": [ + { + "name": "model.layers.16.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.16.mlp.down_proj.q_scale", + "shape": [ + 3072, + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.16.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 12976128 + }, + { + "name": "model.layers.16.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 13762560 + }, + { + "name": "model.layers.16.self_attn.qkv_proj.q_weight", + "shape": [ + 5120, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 13768704 + }, + { + "name": "model.layers.16.self_attn.qkv_proj.q_scale", + "shape": [ + 5120, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 245760, + "byteOffset": 21633024 + }, + { + "name": "model.layers.16.self_attn.o_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 21878784 + }, + { + "name": "model.layers.16.self_attn.o_proj.q_scale", + "shape": [ + 3072, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 147456, + "byteOffset": 26597376 + }, + { + "name": "model.layers.17.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 26744832 + } + ], + "md5sum": "e7a63cefef49b1ad6c9db0ae7dfd7e66" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.17.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "ed993fbe46efa519c0911b0053adac8c" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 26750976, + "records": [ + { + "name": "model.layers.17.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.17.mlp.down_proj.q_scale", + "shape": [ + 3072, + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.17.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 12976128 + }, + { + "name": "model.layers.17.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 13762560 + }, + { + "name": "model.layers.17.self_attn.qkv_proj.q_weight", + "shape": [ + 5120, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 13768704 + }, + { + "name": "model.layers.17.self_attn.qkv_proj.q_scale", + "shape": [ + 5120, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 245760, + "byteOffset": 21633024 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 21878784 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_scale", + "shape": [ + 3072, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 147456, + "byteOffset": 26597376 + }, + { + "name": "model.layers.18.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 26744832 + } + ], + "md5sum": "bfb160ecfc8f59a8d2f5ccb546d5897d" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.18.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "922b38d3c9ce559240fc9d4d59a3411e" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 26750976, + "records": [ + { + "name": "model.layers.18.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.18.mlp.down_proj.q_scale", + "shape": [ + 3072, + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.18.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 12976128 + }, + { + "name": "model.layers.18.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 13762560 + }, + { + "name": "model.layers.18.self_attn.qkv_proj.q_weight", + "shape": [ + 5120, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 13768704 + }, + { + "name": "model.layers.18.self_attn.qkv_proj.q_scale", + "shape": [ + 5120, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 245760, + "byteOffset": 21633024 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 21878784 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_scale", + "shape": [ + 3072, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 147456, + "byteOffset": 26597376 + }, + { + "name": "model.layers.19.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 26744832 + } + ], + "md5sum": "37b2d7524cb5eb0ac1d9238cf718b561" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.19.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "35cb949bdf1c9a85fcde41294fd9f8c2" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 26750976, + "records": [ + { + "name": "model.layers.19.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.19.mlp.down_proj.q_scale", + "shape": [ + 3072, + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.19.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 12976128 + }, + { + "name": "model.layers.19.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 13762560 + }, + { + "name": "model.layers.19.self_attn.qkv_proj.q_weight", + "shape": [ + 5120, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 13768704 + }, + { + "name": "model.layers.19.self_attn.qkv_proj.q_scale", + "shape": [ + 5120, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 245760, + "byteOffset": 21633024 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 21878784 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_scale", + "shape": [ + 3072, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 147456, + "byteOffset": 26597376 + }, + { + "name": "model.layers.2.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 26744832 + } + ], + "md5sum": "f52390f3999d4ee4f3b60c59de1544ac" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "79c4ba798cfac5a5ecdb8f6395ae77a6" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "5a8509548e2757e40347a7065e865f38" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 27531264, + "records": [ + { + "name": "model.layers.2.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.2.mlp.down_proj.q_scale", + "shape": [ + 3072, + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.2.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 12976128 + }, + { + "name": "model.layers.2.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 13762560 + }, + { + "name": "model.layers.2.self_attn.qkv_proj.q_weight", + "shape": [ + 5120, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 13768704 + }, + { + "name": "model.layers.2.self_attn.qkv_proj.q_scale", + "shape": [ + 5120, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 245760, + "byteOffset": 21633024 + }, + { + "name": "model.layers.2.self_attn.o_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 21878784 + }, + { + "name": "model.layers.2.self_attn.o_proj.q_scale", + "shape": [ + 3072, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 147456, + "byteOffset": 26597376 + }, + { + "name": "model.layers.20.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 26744832 + } + ], + "md5sum": "d4e46251ed1306ff9b1864dee5510626" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "7fafaf6c3605a3ac41525c2e663b47ac" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 26750976, + "records": [ + { + "name": "model.layers.20.self_attn.qkv_proj.q_weight", + "shape": [ + 5120, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 0 + }, + { + "name": "model.layers.20.self_attn.qkv_proj.q_scale", + "shape": [ + 5120, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 245760, + "byteOffset": 7864320 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 8110080 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_scale", + "shape": [ + 3072, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 147456, + "byteOffset": 12828672 + }, + { + "name": "model.layers.3.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 12976128 + }, + { + "name": "model.layers.3.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 12982272 + }, + { + "name": "model.layers.3.mlp.down_proj.q_scale", + "shape": [ + 3072, + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 25565184 + }, + { + "name": "model.layers.3.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 25958400 + }, + { + "name": "model.layers.3.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 26744832 + } + ], + "md5sum": "5e76d2fb99aa337aed52debc83d55a0c" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "0ca27b977c9e90f176b0f5951e9198ac" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 26750976, + "records": [ + { + "name": "model.layers.3.self_attn.qkv_proj.q_weight", + "shape": [ + 5120, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 0 + }, + { + "name": "model.layers.3.self_attn.qkv_proj.q_scale", + "shape": [ + 5120, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 245760, + "byteOffset": 7864320 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 8110080 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_scale", + "shape": [ + 3072, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 147456, + "byteOffset": 12828672 + }, + { + "name": "model.layers.4.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 12976128 + }, + { + "name": "model.layers.4.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 12982272 + }, + { + "name": "model.layers.4.mlp.down_proj.q_scale", + "shape": [ + 3072, + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 25565184 + }, + { + "name": "model.layers.4.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 25958400 + }, + { + "name": "model.layers.4.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 26744832 + } + ], + "md5sum": "d5c8332e05928c74f3654e6229b4eb1e" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "69b0e2c66f0be00e4c2b513348ccc58d" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 26750976, + "records": [ + { + "name": "model.layers.4.self_attn.qkv_proj.q_weight", + "shape": [ + 5120, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 0 + }, + { + "name": "model.layers.4.self_attn.qkv_proj.q_scale", + "shape": [ + 5120, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 245760, + "byteOffset": 7864320 + }, + { + "name": "model.layers.4.self_attn.o_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 8110080 + }, + { + "name": "model.layers.4.self_attn.o_proj.q_scale", + "shape": [ + 3072, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 147456, + "byteOffset": 12828672 + }, + { + "name": "model.layers.5.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 12976128 + }, + { + "name": "model.layers.5.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 12982272 + }, + { + "name": "model.layers.5.mlp.down_proj.q_scale", + "shape": [ + 3072, + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 25565184 + }, + { + "name": "model.layers.5.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 25958400 + }, + { + "name": "model.layers.5.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 26744832 + } + ], + "md5sum": "d912af8e1795dd5e1aa69697fef3f87f" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "de6b938ecf7a59bdba88578a141a43ab" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 26750976, + "records": [ + { + "name": "model.layers.5.self_attn.qkv_proj.q_weight", + "shape": [ + 5120, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 0 + }, + { + "name": "model.layers.5.self_attn.qkv_proj.q_scale", + "shape": [ + 5120, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 245760, + "byteOffset": 7864320 + }, + { + "name": "model.layers.5.self_attn.o_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 8110080 + }, + { + "name": "model.layers.5.self_attn.o_proj.q_scale", + "shape": [ + 3072, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 147456, + "byteOffset": 12828672 + }, + { + "name": "model.layers.6.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 12976128 + }, + { + "name": "model.layers.6.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 12982272 + }, + { + "name": "model.layers.6.mlp.down_proj.q_scale", + "shape": [ + 3072, + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 25565184 + }, + { + "name": "model.layers.6.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 25958400 + }, + { + "name": "model.layers.6.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 26744832 + } + ], + "md5sum": "f8b63c2bb4c3753f760931b6894be087" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "1220d854fc6dfdb610c222a9832928a7" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 26750976, + "records": [ + { + "name": "model.layers.6.self_attn.qkv_proj.q_weight", + "shape": [ + 5120, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 0 + }, + { + "name": "model.layers.6.self_attn.qkv_proj.q_scale", + "shape": [ + 5120, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 245760, + "byteOffset": 7864320 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 8110080 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_scale", + "shape": [ + 3072, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 147456, + "byteOffset": 12828672 + }, + { + "name": "model.layers.7.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 12976128 + }, + { + "name": "model.layers.7.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 12982272 + }, + { + "name": "model.layers.7.mlp.down_proj.q_scale", + "shape": [ + 3072, + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 25565184 + }, + { + "name": "model.layers.7.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 25958400 + }, + { + "name": "model.layers.7.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 26744832 + } + ], + "md5sum": "7e60562f90b4c954bfbc1eda5c3c6164" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "d1b3ccbdc1e2b827f67f370aae514efa" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 26750976, + "records": [ + { + "name": "model.layers.7.self_attn.qkv_proj.q_weight", + "shape": [ + 5120, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 0 + }, + { + "name": "model.layers.7.self_attn.qkv_proj.q_scale", + "shape": [ + 5120, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 245760, + "byteOffset": 7864320 + }, + { + "name": "model.layers.7.self_attn.o_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 8110080 + }, + { + "name": "model.layers.7.self_attn.o_proj.q_scale", + "shape": [ + 3072, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 147456, + "byteOffset": 12828672 + }, + { + "name": "model.layers.8.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 12976128 + }, + { + "name": "model.layers.8.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 12982272 + }, + { + "name": "model.layers.8.mlp.down_proj.q_scale", + "shape": [ + 3072, + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 25565184 + }, + { + "name": "model.layers.8.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 25958400 + }, + { + "name": "model.layers.8.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 26744832 + } + ], + "md5sum": "6bc668813dfa99b089384564982008a3" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.9.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "e4ede658990bba83e16121f59ea75385" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 26750976, + "records": [ + { + "name": "model.layers.8.self_attn.qkv_proj.q_weight", + "shape": [ + 5120, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 0 + }, + { + "name": "model.layers.8.self_attn.qkv_proj.q_scale", + "shape": [ + 5120, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 245760, + "byteOffset": 7864320 + }, + { + "name": "model.layers.8.self_attn.o_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 8110080 + }, + { + "name": "model.layers.8.self_attn.o_proj.q_scale", + "shape": [ + 3072, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 147456, + "byteOffset": 12828672 + }, + { + "name": "model.layers.9.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 12976128 + }, + { + "name": "model.layers.9.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 12982272 + }, + { + "name": "model.layers.9.mlp.down_proj.q_scale", + "shape": [ + 3072, + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 25565184 + }, + { + "name": "model.layers.9.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 25958400 + }, + { + "name": "model.layers.9.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 26744832 + } + ], + "md5sum": "a30c76affb75d2beea28dd974eac7d42" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 25970688, + "records": [ + { + "name": "model.layers.9.self_attn.qkv_proj.q_weight", + "shape": [ + 5120, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 0 + }, + { + "name": "model.layers.9.self_attn.qkv_proj.q_scale", + "shape": [ + 5120, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 245760, + "byteOffset": 7864320 + }, + { + "name": "model.layers.9.self_attn.o_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 8110080 + }, + { + "name": "model.layers.9.self_attn.o_proj.q_scale", + "shape": [ + 3072, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 147456, + "byteOffset": 12828672 + }, + { + "name": "model.layers.20.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 12976128 + }, + { + "name": "model.layers.20.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 12982272 + }, + { + "name": "model.layers.20.mlp.down_proj.q_scale", + "shape": [ + 3072, + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 25565184 + }, + { + "name": "model.layers.20.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 25958400 + }, + { + "name": "model.layers.21.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 25964544 + } + ], + "md5sum": "95497ed4987111a354f8441446760ded" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.21.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "eef8d62c11f85246e6c16c3ad98ba015" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 26750976, + "records": [ + { + "name": "model.layers.21.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.21.mlp.down_proj.q_scale", + "shape": [ + 3072, + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.21.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 12976128 + }, + { + "name": "model.layers.21.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 13762560 + }, + { + "name": "model.layers.21.self_attn.qkv_proj.q_weight", + "shape": [ + 5120, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 13768704 + }, + { + "name": "model.layers.21.self_attn.qkv_proj.q_scale", + "shape": [ + 5120, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 245760, + "byteOffset": 21633024 + }, + { + "name": "model.layers.21.self_attn.o_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 21878784 + }, + { + "name": "model.layers.21.self_attn.o_proj.q_scale", + "shape": [ + 3072, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 147456, + "byteOffset": 26597376 + }, + { + "name": "model.layers.22.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 26744832 + } + ], + "md5sum": "b07e782600d5c42b88115cc7cb356e5b" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "34e23dd3d744583a2ed793dc93fae6b3" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 26750976, + "records": [ + { + "name": "model.layers.22.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.22.mlp.down_proj.q_scale", + "shape": [ + 3072, + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.22.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 12976128 + }, + { + "name": "model.layers.22.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 13762560 + }, + { + "name": "model.layers.22.self_attn.qkv_proj.q_weight", + "shape": [ + 5120, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 13768704 + }, + { + "name": "model.layers.22.self_attn.qkv_proj.q_scale", + "shape": [ + 5120, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 245760, + "byteOffset": 21633024 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 21878784 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_scale", + "shape": [ + 3072, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 147456, + "byteOffset": 26597376 + }, + { + "name": "model.layers.23.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 26744832 + } + ], + "md5sum": "e626b4823732ad6f2811a4e4c209eb07" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.23.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "c685f9d558f4e66c73cd1752f0e36a81" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 26750976, + "records": [ + { + "name": "model.layers.23.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.23.mlp.down_proj.q_scale", + "shape": [ + 3072, + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.23.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 12976128 + }, + { + "name": "model.layers.23.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 13762560 + }, + { + "name": "model.layers.23.self_attn.qkv_proj.q_weight", + "shape": [ + 5120, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 13768704 + }, + { + "name": "model.layers.23.self_attn.qkv_proj.q_scale", + "shape": [ + 5120, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 245760, + "byteOffset": 21633024 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 21878784 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_scale", + "shape": [ + 3072, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 147456, + "byteOffset": 26597376 + }, + { + "name": "model.layers.24.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 26744832 + } + ], + "md5sum": "71947e86bdb91d4c402ee303cf2d139e" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "f457319f2146339e214e22efe1bf9f96" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 26750976, + "records": [ + { + "name": "model.layers.24.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.24.mlp.down_proj.q_scale", + "shape": [ + 3072, + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.24.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 12976128 + }, + { + "name": "model.layers.24.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 13762560 + }, + { + "name": "model.layers.24.self_attn.qkv_proj.q_weight", + "shape": [ + 5120, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 13768704 + }, + { + "name": "model.layers.24.self_attn.qkv_proj.q_scale", + "shape": [ + 5120, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 245760, + "byteOffset": 21633024 + }, + { + "name": "model.layers.24.self_attn.o_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 21878784 + }, + { + "name": "model.layers.24.self_attn.o_proj.q_scale", + "shape": [ + 3072, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 147456, + "byteOffset": 26597376 + }, + { + "name": "model.layers.25.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 26744832 + } + ], + "md5sum": "9cc5fe8bbb4e70033141d7b272dca809" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.25.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "135eea4ec06a28d3350cc9df771e1af8" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 26750976, + "records": [ + { + "name": "model.layers.25.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.25.mlp.down_proj.q_scale", + "shape": [ + 3072, + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.25.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 12976128 + }, + { + "name": "model.layers.25.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 13762560 + }, + { + "name": "model.layers.25.self_attn.qkv_proj.q_weight", + "shape": [ + 5120, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 13768704 + }, + { + "name": "model.layers.25.self_attn.qkv_proj.q_scale", + "shape": [ + 5120, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 245760, + "byteOffset": 21633024 + }, + { + "name": "model.layers.25.self_attn.o_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 21878784 + }, + { + "name": "model.layers.25.self_attn.o_proj.q_scale", + "shape": [ + 3072, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 147456, + "byteOffset": 26597376 + }, + { + "name": "model.layers.26.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 26744832 + } + ], + "md5sum": "afa274e391722047ffba2ac85d89c3c6" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.26.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "ef971876b32ee79bb0fe98778349c336" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 26750976, + "records": [ + { + "name": "model.layers.26.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.26.mlp.down_proj.q_scale", + "shape": [ + 3072, + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.26.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 12976128 + }, + { + "name": "model.layers.26.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 13762560 + }, + { + "name": "model.layers.26.self_attn.qkv_proj.q_weight", + "shape": [ + 5120, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 13768704 + }, + { + "name": "model.layers.26.self_attn.qkv_proj.q_scale", + "shape": [ + 5120, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 245760, + "byteOffset": 21633024 + }, + { + "name": "model.layers.26.self_attn.o_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 21878784 + }, + { + "name": "model.layers.26.self_attn.o_proj.q_scale", + "shape": [ + 3072, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 147456, + "byteOffset": 26597376 + }, + { + "name": "model.layers.27.input_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 26744832 + } + ], + "md5sum": "62b6eabb979a0ef12082812b36a3a606" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.27.mlp.gate_up_proj.q_weight", + "shape": [ + 16384, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "bdb58dcdaaa1f2d4c96299b699b25a06" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 26750976, + "records": [ + { + "name": "model.layers.27.mlp.down_proj.q_weight", + "shape": [ + 3072, + 1024 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.27.mlp.down_proj.q_scale", + "shape": [ + 3072, + 64 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 393216, + "byteOffset": 12582912 + }, + { + "name": "model.layers.27.mlp.gate_up_proj.q_scale", + "shape": [ + 16384, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 786432, + "byteOffset": 12976128 + }, + { + "name": "model.layers.27.post_attention_layernorm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 13762560 + }, + { + "name": "model.layers.27.self_attn.qkv_proj.q_weight", + "shape": [ + 5120, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 7864320, + "byteOffset": 13768704 + }, + { + "name": "model.layers.27.self_attn.qkv_proj.q_scale", + "shape": [ + 5120, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 245760, + "byteOffset": 21633024 + }, + { + "name": "model.layers.27.self_attn.o_proj.q_weight", + "shape": [ + 3072, + 384 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4718592, + "byteOffset": 21878784 + }, + { + "name": "model.layers.27.self_attn.o_proj.q_scale", + "shape": [ + 3072, + 24 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 147456, + "byteOffset": 26597376 + }, + { + "name": "model.norm.weight", + "shape": [ + 3072 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 6144, + "byteOffset": 26744832 + } + ], + "md5sum": "586e4703a2e124eff5811c6e5c6c3258" + } + ] +} \ No newline at end of file