{ "metadata": { "ParamSize": 283, "ParamBytes": 1656834048.0, "BitsPerParam": 4.069857841273854 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 197001216, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 128256, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 197001216, "byteOffset": 0 } ], "md5sum": "afed46c2626032adaa3a00e22797b92d" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "10c45988a8aa86a7a036bae52c98a8b8" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 32913408, "records": [ { "name": "model.embed_tokens.q_scale", "shape": [ 128256, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6156288, "byteOffset": 0 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 6156288 }, { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 6162432 }, { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 3072, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 18745344 }, { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 16384, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 19138560 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 19924992 }, { "name": "model.layers.0.self_attn.qkv_proj.q_weight", "shape": [ 5120, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 19931136 }, { "name": "model.layers.0.self_attn.qkv_proj.q_scale", "shape": [ 5120, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 245760, "byteOffset": 27795456 }, { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 28041216 }, { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 3072, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 32759808 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 32907264 } ], "md5sum": "04a1dae5481c20346e8b8852527862ba" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "0ef724e70788f44939bf0e73e96d7a90" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 26750976, "records": [ { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 3072, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 16384, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 12976128 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 13762560 }, { "name": "model.layers.1.self_attn.qkv_proj.q_weight", "shape": [ 5120, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 13768704 }, { "name": "model.layers.1.self_attn.qkv_proj.q_scale", "shape": [ 5120, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 245760, "byteOffset": 21633024 }, { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 21878784 }, { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 3072, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 26597376 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 26744832 } ], "md5sum": "edc8c85302ddcc0715c3ccf2e529901e" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "ffc20dc34865aa146d73cdfbdaeafdbc" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 26750976, "records": [ { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 3072, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 16384, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 12976128 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 13762560 }, { "name": "model.layers.10.self_attn.qkv_proj.q_weight", "shape": [ 5120, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 13768704 }, { "name": "model.layers.10.self_attn.qkv_proj.q_scale", "shape": [ 5120, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 245760, "byteOffset": 21633024 }, { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 21878784 }, { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 3072, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 26597376 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 26744832 } ], "md5sum": "0be50caf0e03ca98071f1485c5f445f3" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "e38ab2c47d1ece68bf58c897ef052082" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 26750976, "records": [ { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 3072, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 16384, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 12976128 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 13762560 }, { "name": "model.layers.11.self_attn.qkv_proj.q_weight", "shape": [ 5120, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 13768704 }, { "name": "model.layers.11.self_attn.qkv_proj.q_scale", "shape": [ 5120, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 245760, "byteOffset": 21633024 }, { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 21878784 }, { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 3072, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 26597376 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 26744832 } ], "md5sum": "de11c331209ac1da630fdd6b6ca41cc8" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "c83d31bdc8a32c7cd685f7780884a0a0" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 26750976, "records": [ { "name": "model.layers.12.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.12.mlp.down_proj.q_scale", "shape": [ 3072, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 16384, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 12976128 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 13762560 }, { "name": "model.layers.12.self_attn.qkv_proj.q_weight", "shape": [ 5120, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 13768704 }, { "name": "model.layers.12.self_attn.qkv_proj.q_scale", "shape": [ 5120, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 245760, "byteOffset": 21633024 }, { "name": "model.layers.12.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 21878784 }, { "name": "model.layers.12.self_attn.o_proj.q_scale", "shape": [ 3072, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 26597376 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 26744832 } ], "md5sum": "e84bd5b63a224d068ff9cf2c31042bfe" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "ce8b583d04d00eda044d90b3a3ae4157" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 26750976, "records": [ { "name": "model.layers.13.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.13.mlp.down_proj.q_scale", "shape": [ 3072, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 16384, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 12976128 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 13762560 }, { "name": "model.layers.13.self_attn.qkv_proj.q_weight", "shape": [ 5120, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 13768704 }, { "name": "model.layers.13.self_attn.qkv_proj.q_scale", "shape": [ 5120, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 245760, "byteOffset": 21633024 }, { "name": "model.layers.13.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 21878784 }, { "name": "model.layers.13.self_attn.o_proj.q_scale", "shape": [ 3072, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 26597376 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 26744832 } ], "md5sum": "475c4991b84ba39097fa26d5f7e8dd8f" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "51ed30cbc2bc3665cf7d2777323168f0" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 26750976, "records": [ { "name": "model.layers.14.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.14.mlp.down_proj.q_scale", "shape": [ 3072, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 16384, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 12976128 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 13762560 }, { "name": "model.layers.14.self_attn.qkv_proj.q_weight", "shape": [ 5120, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 13768704 }, { "name": "model.layers.14.self_attn.qkv_proj.q_scale", "shape": [ 5120, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 245760, "byteOffset": 21633024 }, { "name": "model.layers.14.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 21878784 }, { "name": "model.layers.14.self_attn.o_proj.q_scale", "shape": [ 3072, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 26597376 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 26744832 } ], "md5sum": "5d1577c3281078ee2401f3284940aeab" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "4da070ae30d04ff2cc6efeeea4f2a540" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 26750976, "records": [ { "name": "model.layers.15.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.15.mlp.down_proj.q_scale", "shape": [ 3072, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 16384, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 12976128 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 13762560 }, { "name": "model.layers.15.self_attn.qkv_proj.q_weight", "shape": [ 5120, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 13768704 }, { "name": "model.layers.15.self_attn.qkv_proj.q_scale", "shape": [ 5120, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 245760, "byteOffset": 21633024 }, { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 21878784 }, { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 3072, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 26597376 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 26744832 } ], "md5sum": "bfbbfc67e15a76b14deb26f67255dca3" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "27471e3c4f1d553eae646f2c65a7f223" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 26750976, "records": [ { "name": "model.layers.16.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.16.mlp.down_proj.q_scale", "shape": [ 3072, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.16.mlp.gate_up_proj.q_scale", "shape": [ 16384, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 12976128 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 13762560 }, { "name": "model.layers.16.self_attn.qkv_proj.q_weight", "shape": [ 5120, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 13768704 }, { "name": "model.layers.16.self_attn.qkv_proj.q_scale", "shape": [ 5120, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 245760, "byteOffset": 21633024 }, { "name": "model.layers.16.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 21878784 }, { "name": "model.layers.16.self_attn.o_proj.q_scale", "shape": [ 3072, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 26597376 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 26744832 } ], "md5sum": "e7a63cefef49b1ad6c9db0ae7dfd7e66" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "ed993fbe46efa519c0911b0053adac8c" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 26750976, "records": [ { "name": "model.layers.17.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.17.mlp.down_proj.q_scale", "shape": [ 3072, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.17.mlp.gate_up_proj.q_scale", "shape": [ 16384, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 12976128 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 13762560 }, { "name": "model.layers.17.self_attn.qkv_proj.q_weight", "shape": [ 5120, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 13768704 }, { "name": "model.layers.17.self_attn.qkv_proj.q_scale", "shape": [ 5120, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 245760, "byteOffset": 21633024 }, { "name": "model.layers.17.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 21878784 }, { "name": "model.layers.17.self_attn.o_proj.q_scale", "shape": [ 3072, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 26597376 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 26744832 } ], "md5sum": "bfb160ecfc8f59a8d2f5ccb546d5897d" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "922b38d3c9ce559240fc9d4d59a3411e" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 26750976, "records": [ { "name": "model.layers.18.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.18.mlp.down_proj.q_scale", "shape": [ 3072, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.18.mlp.gate_up_proj.q_scale", "shape": [ 16384, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 12976128 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 13762560 }, { "name": "model.layers.18.self_attn.qkv_proj.q_weight", "shape": [ 5120, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 13768704 }, { "name": "model.layers.18.self_attn.qkv_proj.q_scale", "shape": [ 5120, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 245760, "byteOffset": 21633024 }, { "name": "model.layers.18.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 21878784 }, { "name": "model.layers.18.self_attn.o_proj.q_scale", "shape": [ 3072, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 26597376 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 26744832 } ], "md5sum": "37b2d7524cb5eb0ac1d9238cf718b561" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "35cb949bdf1c9a85fcde41294fd9f8c2" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 26750976, "records": [ { "name": "model.layers.19.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.19.mlp.down_proj.q_scale", "shape": [ 3072, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.19.mlp.gate_up_proj.q_scale", "shape": [ 16384, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 12976128 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 13762560 }, { "name": "model.layers.19.self_attn.qkv_proj.q_weight", "shape": [ 5120, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 13768704 }, { "name": "model.layers.19.self_attn.qkv_proj.q_scale", "shape": [ 5120, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 245760, "byteOffset": 21633024 }, { "name": "model.layers.19.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 21878784 }, { "name": "model.layers.19.self_attn.o_proj.q_scale", "shape": [ 3072, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 26597376 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 26744832 } ], "md5sum": "f52390f3999d4ee4f3b60c59de1544ac" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "79c4ba798cfac5a5ecdb8f6395ae77a6" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "5a8509548e2757e40347a7065e865f38" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 27531264, "records": [ { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 3072, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 16384, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 12976128 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 13762560 }, { "name": "model.layers.2.self_attn.qkv_proj.q_weight", "shape": [ 5120, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 13768704 }, { "name": "model.layers.2.self_attn.qkv_proj.q_scale", "shape": [ 5120, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 245760, "byteOffset": 21633024 }, { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 21878784 }, { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 3072, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 26597376 }, { "name": "model.layers.20.mlp.gate_up_proj.q_scale", "shape": [ 16384, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 26744832 } ], "md5sum": "d4e46251ed1306ff9b1864dee5510626" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "7fafaf6c3605a3ac41525c2e663b47ac" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 26750976, "records": [ { "name": "model.layers.20.self_attn.qkv_proj.q_weight", "shape": [ 5120, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 0 }, { "name": "model.layers.20.self_attn.qkv_proj.q_scale", "shape": [ 5120, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 245760, "byteOffset": 7864320 }, { "name": "model.layers.20.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 8110080 }, { "name": "model.layers.20.self_attn.o_proj.q_scale", "shape": [ 3072, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 12828672 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 12976128 }, { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 12982272 }, { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 3072, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 25565184 }, { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 16384, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 25958400 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 26744832 } ], "md5sum": "5e76d2fb99aa337aed52debc83d55a0c" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "0ca27b977c9e90f176b0f5951e9198ac" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 26750976, "records": [ { "name": "model.layers.3.self_attn.qkv_proj.q_weight", "shape": [ 5120, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 0 }, { "name": "model.layers.3.self_attn.qkv_proj.q_scale", "shape": [ 5120, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 245760, "byteOffset": 7864320 }, { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 8110080 }, { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 3072, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 12828672 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 12976128 }, { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 12982272 }, { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 3072, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 25565184 }, { "name": "model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 16384, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 25958400 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 26744832 } ], "md5sum": "d5c8332e05928c74f3654e6229b4eb1e" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "69b0e2c66f0be00e4c2b513348ccc58d" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 26750976, "records": [ { "name": "model.layers.4.self_attn.qkv_proj.q_weight", "shape": [ 5120, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 0 }, { "name": "model.layers.4.self_attn.qkv_proj.q_scale", "shape": [ 5120, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 245760, "byteOffset": 7864320 }, { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 8110080 }, { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 3072, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 12828672 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 12976128 }, { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 12982272 }, { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 3072, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 25565184 }, { "name": "model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 16384, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 25958400 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 26744832 } ], "md5sum": "d912af8e1795dd5e1aa69697fef3f87f" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "de6b938ecf7a59bdba88578a141a43ab" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 26750976, "records": [ { "name": "model.layers.5.self_attn.qkv_proj.q_weight", "shape": [ 5120, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 0 }, { "name": "model.layers.5.self_attn.qkv_proj.q_scale", "shape": [ 5120, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 245760, "byteOffset": 7864320 }, { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 8110080 }, { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 3072, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 12828672 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 12976128 }, { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 12982272 }, { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 3072, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 25565184 }, { "name": "model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 16384, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 25958400 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 26744832 } ], "md5sum": "f8b63c2bb4c3753f760931b6894be087" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "1220d854fc6dfdb610c222a9832928a7" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 26750976, "records": [ { "name": "model.layers.6.self_attn.qkv_proj.q_weight", "shape": [ 5120, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 0 }, { "name": "model.layers.6.self_attn.qkv_proj.q_scale", "shape": [ 5120, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 245760, "byteOffset": 7864320 }, { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 8110080 }, { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 3072, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 12828672 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 12976128 }, { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 12982272 }, { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 3072, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 25565184 }, { "name": "model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 16384, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 25958400 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 26744832 } ], "md5sum": "7e60562f90b4c954bfbc1eda5c3c6164" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "d1b3ccbdc1e2b827f67f370aae514efa" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 26750976, "records": [ { "name": "model.layers.7.self_attn.qkv_proj.q_weight", "shape": [ 5120, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 0 }, { "name": "model.layers.7.self_attn.qkv_proj.q_scale", "shape": [ 5120, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 245760, "byteOffset": 7864320 }, { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 8110080 }, { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 3072, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 12828672 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 12976128 }, { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 12982272 }, { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 3072, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 25565184 }, { "name": "model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 16384, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 25958400 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 26744832 } ], "md5sum": "6bc668813dfa99b089384564982008a3" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "e4ede658990bba83e16121f59ea75385" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 26750976, "records": [ { "name": "model.layers.8.self_attn.qkv_proj.q_weight", "shape": [ 5120, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 0 }, { "name": "model.layers.8.self_attn.qkv_proj.q_scale", "shape": [ 5120, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 245760, "byteOffset": 7864320 }, { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 8110080 }, { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 3072, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 12828672 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 12976128 }, { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 12982272 }, { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 3072, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 25565184 }, { "name": "model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 16384, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 25958400 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 26744832 } ], "md5sum": "a30c76affb75d2beea28dd974eac7d42" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 25970688, "records": [ { "name": "model.layers.9.self_attn.qkv_proj.q_weight", "shape": [ 5120, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 0 }, { "name": "model.layers.9.self_attn.qkv_proj.q_scale", "shape": [ 5120, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 245760, "byteOffset": 7864320 }, { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 8110080 }, { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 3072, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 12828672 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 12976128 }, { "name": "model.layers.20.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 12982272 }, { "name": "model.layers.20.mlp.down_proj.q_scale", "shape": [ 3072, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 25565184 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25958400 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 25964544 } ], "md5sum": "95497ed4987111a354f8441446760ded" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "eef8d62c11f85246e6c16c3ad98ba015" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 26750976, "records": [ { "name": "model.layers.21.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.21.mlp.down_proj.q_scale", "shape": [ 3072, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.21.mlp.gate_up_proj.q_scale", "shape": [ 16384, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 12976128 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 13762560 }, { "name": "model.layers.21.self_attn.qkv_proj.q_weight", "shape": [ 5120, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 13768704 }, { "name": "model.layers.21.self_attn.qkv_proj.q_scale", "shape": [ 5120, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 245760, "byteOffset": 21633024 }, { "name": "model.layers.21.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 21878784 }, { "name": "model.layers.21.self_attn.o_proj.q_scale", "shape": [ 3072, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 26597376 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 26744832 } ], "md5sum": "b07e782600d5c42b88115cc7cb356e5b" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "34e23dd3d744583a2ed793dc93fae6b3" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 26750976, "records": [ { "name": "model.layers.22.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.22.mlp.down_proj.q_scale", "shape": [ 3072, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.22.mlp.gate_up_proj.q_scale", "shape": [ 16384, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 12976128 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 13762560 }, { "name": "model.layers.22.self_attn.qkv_proj.q_weight", "shape": [ 5120, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 13768704 }, { "name": "model.layers.22.self_attn.qkv_proj.q_scale", "shape": [ 5120, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 245760, "byteOffset": 21633024 }, { "name": "model.layers.22.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 21878784 }, { "name": "model.layers.22.self_attn.o_proj.q_scale", "shape": [ 3072, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 26597376 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 26744832 } ], "md5sum": "e626b4823732ad6f2811a4e4c209eb07" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "c685f9d558f4e66c73cd1752f0e36a81" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 26750976, "records": [ { "name": "model.layers.23.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.23.mlp.down_proj.q_scale", "shape": [ 3072, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.23.mlp.gate_up_proj.q_scale", "shape": [ 16384, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 12976128 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 13762560 }, { "name": "model.layers.23.self_attn.qkv_proj.q_weight", "shape": [ 5120, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 13768704 }, { "name": "model.layers.23.self_attn.qkv_proj.q_scale", "shape": [ 5120, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 245760, "byteOffset": 21633024 }, { "name": "model.layers.23.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 21878784 }, { "name": "model.layers.23.self_attn.o_proj.q_scale", "shape": [ 3072, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 26597376 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 26744832 } ], "md5sum": "71947e86bdb91d4c402ee303cf2d139e" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "f457319f2146339e214e22efe1bf9f96" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 26750976, "records": [ { "name": "model.layers.24.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.24.mlp.down_proj.q_scale", "shape": [ 3072, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.24.mlp.gate_up_proj.q_scale", "shape": [ 16384, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 12976128 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 13762560 }, { "name": "model.layers.24.self_attn.qkv_proj.q_weight", "shape": [ 5120, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 13768704 }, { "name": "model.layers.24.self_attn.qkv_proj.q_scale", "shape": [ 5120, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 245760, "byteOffset": 21633024 }, { "name": "model.layers.24.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 21878784 }, { "name": "model.layers.24.self_attn.o_proj.q_scale", "shape": [ 3072, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 26597376 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 26744832 } ], "md5sum": "9cc5fe8bbb4e70033141d7b272dca809" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "135eea4ec06a28d3350cc9df771e1af8" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 26750976, "records": [ { "name": "model.layers.25.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.25.mlp.down_proj.q_scale", "shape": [ 3072, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.25.mlp.gate_up_proj.q_scale", "shape": [ 16384, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 12976128 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 13762560 }, { "name": "model.layers.25.self_attn.qkv_proj.q_weight", "shape": [ 5120, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 13768704 }, { "name": "model.layers.25.self_attn.qkv_proj.q_scale", "shape": [ 5120, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 245760, "byteOffset": 21633024 }, { "name": "model.layers.25.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 21878784 }, { "name": "model.layers.25.self_attn.o_proj.q_scale", "shape": [ 3072, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 26597376 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 26744832 } ], "md5sum": "afa274e391722047ffba2ac85d89c3c6" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "ef971876b32ee79bb0fe98778349c336" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 26750976, "records": [ { "name": "model.layers.26.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.26.mlp.down_proj.q_scale", "shape": [ 3072, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.26.mlp.gate_up_proj.q_scale", "shape": [ 16384, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 12976128 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 13762560 }, { "name": "model.layers.26.self_attn.qkv_proj.q_weight", "shape": [ 5120, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 13768704 }, { "name": "model.layers.26.self_attn.qkv_proj.q_scale", "shape": [ 5120, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 245760, "byteOffset": 21633024 }, { "name": "model.layers.26.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 21878784 }, { "name": "model.layers.26.self_attn.o_proj.q_scale", "shape": [ 3072, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 26597376 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 26744832 } ], "md5sum": "62b6eabb979a0ef12082812b36a3a606" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_weight", "shape": [ 16384, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "bdb58dcdaaa1f2d4c96299b699b25a06" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 26750976, "records": [ { "name": "model.layers.27.mlp.down_proj.q_weight", "shape": [ 3072, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.27.mlp.down_proj.q_scale", "shape": [ 3072, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.27.mlp.gate_up_proj.q_scale", "shape": [ 16384, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 786432, "byteOffset": 12976128 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 13762560 }, { "name": "model.layers.27.self_attn.qkv_proj.q_weight", "shape": [ 5120, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7864320, "byteOffset": 13768704 }, { "name": "model.layers.27.self_attn.qkv_proj.q_scale", "shape": [ 5120, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 245760, "byteOffset": 21633024 }, { "name": "model.layers.27.self_attn.o_proj.q_weight", "shape": [ 3072, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 21878784 }, { "name": "model.layers.27.self_attn.o_proj.q_scale", "shape": [ 3072, 24 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 147456, "byteOffset": 26597376 }, { "name": "model.norm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 26744832 } ], "md5sum": "586e4703a2e124eff5811c6e5c6c3258" } ] }