CodeLlama-7b-hf-q4f16_1-MLC / ndarray-cache.json
CharlieFRuan's picture
Initial commit
ab50f2a verified
{
"metadata": {
"ParamSize": 325,
"ParamBytes": 3790815232.0,
"BitsPerParam": 4.5004543650347415
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 65568768,
"records": [
{
"name": "lm_head.q_weight",
"shape": [
32016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 65568768,
"byteOffset": 0
}
],
"md5sum": "92fdedddf022494211e083faacdd71e8"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 30748672,
"records": [
{
"name": "lm_head.q_scale",
"shape": [
32016,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8196096,
"byteOffset": 0
},
{
"name": "model.layers.23.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 8196096
},
{
"name": "model.layers.23.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 8204288
}
],
"md5sum": "216a98b13c2d5ee69780d990398a8125"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.24.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "04f37c3b98cd939c907567b1e3a6d9ec"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 28196864,
"records": [
{
"name": "model.layers.23.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 0
},
{
"name": "model.layers.23.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 2818048
},
{
"name": "model.layers.24.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 2826240
},
{
"name": "model.layers.24.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 2834432
},
{
"name": "model.layers.24.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 25378816
}
],
"md5sum": "4c1bbe8128850c4bb0eb1fd9ffdad727"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 30810112,
"records": [
{
"name": "model.layers.24.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 0
},
{
"name": "model.layers.24.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 5636096
},
{
"name": "model.layers.24.self_attn.qkv_proj.q_weight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 5644288
}
],
"md5sum": "9b294217b05a2518b263f9c32cec66ac"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.25.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "e048235478e875c53e0c228f8ab50001"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.25.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "f257fdb54fa52b704b06186b27305e5a"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.25.self_attn.qkv_proj.q_weight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "5307871448b2ae395638922e72a52b0f"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 32587776,
"records": [
{
"name": "model.layers.24.self_attn.qkv_proj.q_scale",
"shape": [
12288,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 0
},
{
"name": "model.layers.24.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 3145728
},
{
"name": "model.layers.24.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 11534336
},
{
"name": "model.layers.25.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 12582912
},
{
"name": "model.layers.25.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 12591104
},
{
"name": "model.layers.25.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 15409152
},
{
"name": "model.layers.25.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21045248
},
{
"name": "model.layers.25.self_attn.qkv_proj.q_scale",
"shape": [
12288,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21053440
},
{
"name": "model.layers.25.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 24199168
}
],
"md5sum": "3fc8e600d6fa0daf41bb8cb9967fddf0"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.26.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "ea6dbcaca9ee944f1b6a514ab30d88f3"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.26.self_attn.qkv_proj.q_weight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "230f7ce34ec8ecd2aa2156c3fdc67d38"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 32063488,
"records": [
{
"name": "model.layers.25.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 0
},
{
"name": "model.layers.26.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 1048576
},
{
"name": "model.layers.26.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 1056768
},
{
"name": "model.layers.26.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 23601152
},
{
"name": "model.layers.26.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 26419200
},
{
"name": "model.layers.26.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32055296
}
],
"md5sum": "ab63dc935ddc7eb12e9ba2af120ce37d"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.27.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "789264c42cf0702c53f49f119710d8fe"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.27.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "b8bb44bb788f7391432f49d00ecabd3e"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.27.self_attn.qkv_proj.q_weight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "ce1870b922d1d46a155e0a27e8298ee3"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 32587776,
"records": [
{
"name": "model.layers.26.self_attn.qkv_proj.q_scale",
"shape": [
12288,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 0
},
{
"name": "model.layers.26.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 3145728
},
{
"name": "model.layers.26.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 11534336
},
{
"name": "model.layers.27.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 12582912
},
{
"name": "model.layers.27.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 12591104
},
{
"name": "model.layers.27.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 15409152
},
{
"name": "model.layers.27.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21045248
},
{
"name": "model.layers.27.self_attn.qkv_proj.q_scale",
"shape": [
12288,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21053440
},
{
"name": "model.layers.27.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 24199168
}
],
"md5sum": "6edab875238cb8f5c93b571f492750fa"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.28.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "01625ebb76de2ea16856529354078c2a"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.28.self_attn.qkv_proj.q_weight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "e8c467a62c1da4f977186a07702c02aa"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 32063488,
"records": [
{
"name": "model.layers.27.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 0
},
{
"name": "model.layers.28.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 1048576
},
{
"name": "model.layers.28.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 1056768
},
{
"name": "model.layers.28.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 23601152
},
{
"name": "model.layers.28.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 26419200
},
{
"name": "model.layers.28.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32055296
}
],
"md5sum": "8bd784ce88e102a68207e79ed9719eaf"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.29.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "e415a6e45565035bcef471140b711f98"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.29.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "b9b010aee20b2567d1e95e7e567b9f10"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.29.self_attn.qkv_proj.q_weight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "6a48d6758bb6f503989a08052148b1bc"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 32587776,
"records": [
{
"name": "model.layers.28.self_attn.qkv_proj.q_scale",
"shape": [
12288,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 0
},
{
"name": "model.layers.28.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 3145728
},
{
"name": "model.layers.28.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 11534336
},
{
"name": "model.layers.29.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 12582912
},
{
"name": "model.layers.29.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 12591104
},
{
"name": "model.layers.29.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 15409152
},
{
"name": "model.layers.29.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21045248
},
{
"name": "model.layers.29.self_attn.qkv_proj.q_scale",
"shape": [
12288,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21053440
},
{
"name": "model.layers.29.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 24199168
}
],
"md5sum": "348527553b77452d810940a523ea7086"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.30.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "05b4d4a0d60c5c7482e12806286154cd"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.30.self_attn.qkv_proj.q_weight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "cef98d239b67d3ae2019fc2bc71d0bca"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 32063488,
"records": [
{
"name": "model.layers.29.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 0
},
{
"name": "model.layers.30.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 1048576
},
{
"name": "model.layers.30.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 1056768
},
{
"name": "model.layers.30.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 23601152
},
{
"name": "model.layers.30.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 26419200
},
{
"name": "model.layers.30.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32055296
}
],
"md5sum": "9aadfd5fe2bbc650bb23a50768e97c43"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.31.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "742b4b4e005fec4f7ffee43be1c4b8e3"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.31.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "f9cb360778d494ec5d3f90ffd4faa6fb"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.31.self_attn.qkv_proj.q_weight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "3ff7f22e7dbc2cd0aba8753b49ea7357"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 32587776,
"records": [
{
"name": "model.layers.30.self_attn.qkv_proj.q_scale",
"shape": [
12288,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 0
},
{
"name": "model.layers.30.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 3145728
},
{
"name": "model.layers.30.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 11534336
},
{
"name": "model.layers.31.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 12582912
},
{
"name": "model.layers.31.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 12591104
},
{
"name": "model.layers.31.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 15409152
},
{
"name": "model.layers.31.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21045248
},
{
"name": "model.layers.31.self_attn.qkv_proj.q_scale",
"shape": [
12288,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21053440
},
{
"name": "model.layers.31.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 24199168
}
],
"md5sum": "4abe52293f273257a375dd2ad3f49760"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 65568768,
"records": [
{
"name": "model.embed_tokens.q_weight",
"shape": [
32016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 65568768,
"byteOffset": 0
}
],
"md5sum": "a45e34276343aaefe983617424444f59"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 31805440,
"records": [
{
"name": "model.layers.31.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 0
},
{
"name": "model.norm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 1048576
},
{
"name": "model.embed_tokens.q_scale",
"shape": [
32016,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8196096,
"byteOffset": 1056768
},
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 9252864
},
{
"name": "model.layers.0.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 9261056
}
],
"md5sum": "16ddf177e9ea05d339f3c1ee189c6d23"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "fa16cd1afcb1c01d26e5d88f1c818997"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.0.self_attn.qkv_proj.q_weight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "4a3752bcc0c5666b598d4a22c5a6fdd3"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.1.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "2dba6b4c17f95ed4401bf4bf3280b22f"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "7c2c303adf36b0cc6e6a8cab8e145069"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.1.self_attn.qkv_proj.q_weight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "61018c259da283dcfc3ec965d2081511"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 32661504,
"records": [
{
"name": "model.layers.0.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 0
},
{
"name": "model.layers.0.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 2818048
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 8454144
},
{
"name": "model.layers.0.self_attn.qkv_proj.q_scale",
"shape": [
12288,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 8462336
},
{
"name": "model.layers.0.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 11608064
},
{
"name": "model.layers.0.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 19996672
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21045248
},
{
"name": "model.layers.1.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 21053440
},
{
"name": "model.layers.1.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 23871488
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 29507584
},
{
"name": "model.layers.1.self_attn.qkv_proj.q_scale",
"shape": [
12288,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 29515776
}
],
"md5sum": "de13126866779b5078d67c90547ecb03"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 31989760,
"records": [
{
"name": "model.layers.1.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.1.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 8388608
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 9437184
},
{
"name": "model.layers.10.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 9445376
}
],
"md5sum": "1edf4606f0b047f3e422efa30a1bfd0f"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.10.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "d908afd50fbda4d1355a4f7a76bd5238"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.10.self_attn.qkv_proj.q_weight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "b364b6cbbc5e86ccf796635b6d608528"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.11.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "792e20a7ef33c5add7179448644c0d9c"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.11.self_attn.qkv_proj.q_weight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "64a7cd015f1583450feee9f46622fe5c"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 29827072,
"records": [
{
"name": "model.layers.10.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 0
},
{
"name": "model.layers.10.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 2818048
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 8454144
},
{
"name": "model.layers.10.self_attn.qkv_proj.q_scale",
"shape": [
12288,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 8462336
},
{
"name": "model.layers.10.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 11608064
},
{
"name": "model.layers.10.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 19996672
},
{
"name": "model.layers.11.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 21045248
},
{
"name": "model.layers.11.self_attn.qkv_proj.q_scale",
"shape": [
12288,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 26681344
}
],
"md5sum": "d5bb7eb73187263a20e8a88d8f1b1a8a"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 31989760,
"records": [
{
"name": "model.layers.11.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.11.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 8388608
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 9437184
},
{
"name": "model.layers.2.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 9445376
}
],
"md5sum": "039cd3764ba4ae2feff8db36d3abd784"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "206c8a1b3062e5ba98e44801b0304a30"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.2.self_attn.qkv_proj.q_weight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "af66be3a301f893d98b652e47a8aed32"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.3.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "f73b0bd39d9932c1981696b1fe1322a1"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "b362fd6a43044dc1da769840e17d713f"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.3.self_attn.qkv_proj.q_weight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "491218baa2376003f6904b74cd8ca3d1"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 32661504,
"records": [
{
"name": "model.layers.2.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 0
},
{
"name": "model.layers.2.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 2818048
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 8454144
},
{
"name": "model.layers.2.self_attn.qkv_proj.q_scale",
"shape": [
12288,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 8462336
},
{
"name": "model.layers.2.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 11608064
},
{
"name": "model.layers.2.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 19996672
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21045248
},
{
"name": "model.layers.3.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 21053440
},
{
"name": "model.layers.3.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 23871488
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 29507584
},
{
"name": "model.layers.3.self_attn.qkv_proj.q_scale",
"shape": [
12288,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 29515776
}
],
"md5sum": "0eaec9a8df183aa7f67ffaa4700b2331"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 31989760,
"records": [
{
"name": "model.layers.3.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.3.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 8388608
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 9437184
},
{
"name": "model.layers.4.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 9445376
}
],
"md5sum": "7c4ab3ef5026cc432a7a449c4fe6259a"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "255ece57ec28b954cd40318ab6bcd9fd"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.4.self_attn.qkv_proj.q_weight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "12a8f163d4386a31898f53223d681661"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.5.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "3991d049336e1108ad4e325a64128fa6"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "342bdd05e4c2856adf1e3689e9958273"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.5.self_attn.qkv_proj.q_weight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "f801516435b71d0ae8374e05cb06653a"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 32661504,
"records": [
{
"name": "model.layers.4.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 0
},
{
"name": "model.layers.4.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 2818048
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 8454144
},
{
"name": "model.layers.4.self_attn.qkv_proj.q_scale",
"shape": [
12288,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 8462336
},
{
"name": "model.layers.4.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 11608064
},
{
"name": "model.layers.4.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 19996672
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21045248
},
{
"name": "model.layers.5.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 21053440
},
{
"name": "model.layers.5.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 23871488
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 29507584
},
{
"name": "model.layers.5.self_attn.qkv_proj.q_scale",
"shape": [
12288,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 29515776
}
],
"md5sum": "74a07a9594c92cc6de5daddb86bb31d8"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 31989760,
"records": [
{
"name": "model.layers.5.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.5.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 8388608
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 9437184
},
{
"name": "model.layers.6.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 9445376
}
],
"md5sum": "d38ab195e55b152bef777873893f33ef"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "007c907347834a47fda695239cf3046e"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.6.self_attn.qkv_proj.q_weight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "fbcf58f028201c2abdd4397f70622e87"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.7.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "5a2615d032c4bd0f62b6fd0d193755d8"
},
{
"dataPath": "params_shard_62.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "52b9af337e1e43d601b32573bd468d48"
},
{
"dataPath": "params_shard_63.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.7.self_attn.qkv_proj.q_weight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "bd311110d33e10f471a4151f42aa1c17"
},
{
"dataPath": "params_shard_64.bin",
"format": "raw-shard",
"nbytes": 32661504,
"records": [
{
"name": "model.layers.6.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 0
},
{
"name": "model.layers.6.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 2818048
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 8454144
},
{
"name": "model.layers.6.self_attn.qkv_proj.q_scale",
"shape": [
12288,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 8462336
},
{
"name": "model.layers.6.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 11608064
},
{
"name": "model.layers.6.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 19996672
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21045248
},
{
"name": "model.layers.7.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 21053440
},
{
"name": "model.layers.7.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 23871488
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 29507584
},
{
"name": "model.layers.7.self_attn.qkv_proj.q_scale",
"shape": [
12288,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 29515776
}
],
"md5sum": "5ea40cdff1c9e4c911355c68fc1408cc"
},
{
"dataPath": "params_shard_65.bin",
"format": "raw-shard",
"nbytes": 31989760,
"records": [
{
"name": "model.layers.7.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.7.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 8388608
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 9437184
},
{
"name": "model.layers.8.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 9445376
}
],
"md5sum": "60c7740b29f3942cb7079c93277a6601"
},
{
"dataPath": "params_shard_66.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "46731a2f2fcdf5cecf6fe8238b595c89"
},
{
"dataPath": "params_shard_67.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.8.self_attn.qkv_proj.q_weight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "757d874aed1ec4f21170e046c41e57c5"
},
{
"dataPath": "params_shard_68.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.9.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "cc38b3ae90046cac249ba5d02f06e535"
},
{
"dataPath": "params_shard_69.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "5ce528e24350051b2e1ef17add184e57"
},
{
"dataPath": "params_shard_70.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.9.self_attn.qkv_proj.q_weight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "b87f556eec61303644d99413d428d59e"
},
{
"dataPath": "params_shard_71.bin",
"format": "raw-shard",
"nbytes": 32661504,
"records": [
{
"name": "model.layers.8.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 0
},
{
"name": "model.layers.8.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 2818048
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 8454144
},
{
"name": "model.layers.8.self_attn.qkv_proj.q_scale",
"shape": [
12288,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 8462336
},
{
"name": "model.layers.8.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 11608064
},
{
"name": "model.layers.8.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 19996672
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21045248
},
{
"name": "model.layers.9.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 21053440
},
{
"name": "model.layers.9.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 23871488
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 29507584
},
{
"name": "model.layers.9.self_attn.qkv_proj.q_scale",
"shape": [
12288,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 29515776
}
],
"md5sum": "21f9c4ee03c6d889c59755e8d6936340"
},
{
"dataPath": "params_shard_72.bin",
"format": "raw-shard",
"nbytes": 31989760,
"records": [
{
"name": "model.layers.9.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.9.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 8388608
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 9437184
},
{
"name": "model.layers.11.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 9445376
}
],
"md5sum": "da45893b7f5f7dc7ef5d3d3dec8eb7c4"
},
{
"dataPath": "params_shard_73.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "9c62af82cbba26ee5efd5fb914aafc7f"
},
{
"dataPath": "params_shard_74.bin",
"format": "raw-shard",
"nbytes": 28196864,
"records": [
{
"name": "model.layers.11.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 0
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 2818048
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 2826240
},
{
"name": "model.layers.12.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 2834432
},
{
"name": "model.layers.12.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 25378816
}
],
"md5sum": "e00185f8574fc52b921defe52a329b8b"
},
{
"dataPath": "params_shard_75.bin",
"format": "raw-shard",
"nbytes": 30810112,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 0
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 5636096
},
{
"name": "model.layers.12.self_attn.qkv_proj.q_weight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 5644288
}
],
"md5sum": "15c8b22ab21658226103203004ffa790"
},
{
"dataPath": "params_shard_76.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.13.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "321dfcb3f96f30b4060013e3075f37d7"
},
{
"dataPath": "params_shard_77.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.13.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "626c97545ea5a49754e124425aaae1ee"
},
{
"dataPath": "params_shard_78.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.13.self_attn.qkv_proj.q_weight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "854f8cf1057c14d1e01064e777ab928d"
},
{
"dataPath": "params_shard_79.bin",
"format": "raw-shard",
"nbytes": 32587776,
"records": [
{
"name": "model.layers.12.self_attn.qkv_proj.q_scale",
"shape": [
12288,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 0
},
{
"name": "model.layers.12.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 3145728
},
{
"name": "model.layers.12.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 11534336
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 12582912
},
{
"name": "model.layers.13.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 12591104
},
{
"name": "model.layers.13.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 15409152
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21045248
},
{
"name": "model.layers.13.self_attn.qkv_proj.q_scale",
"shape": [
12288,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21053440
},
{
"name": "model.layers.13.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 24199168
}
],
"md5sum": "96b87cd67e6b29fc8ae9f72cacdb7174"
},
{
"dataPath": "params_shard_80.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "65f31d696b0ca7066864c9e17fed14f2"
},
{
"dataPath": "params_shard_81.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.14.self_attn.qkv_proj.q_weight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "75848f61ea2863b64c37fbc8cc711e80"
},
{
"dataPath": "params_shard_82.bin",
"format": "raw-shard",
"nbytes": 32063488,
"records": [
{
"name": "model.layers.13.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 0
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 1048576
},
{
"name": "model.layers.14.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 1056768
},
{
"name": "model.layers.14.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 23601152
},
{
"name": "model.layers.14.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 26419200
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32055296
}
],
"md5sum": "ee82ed5a2d4e75d623b6204a84e4863e"
},
{
"dataPath": "params_shard_83.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.15.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "5fb7d01e10d853c74d49cedd4501cedb"
},
{
"dataPath": "params_shard_84.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.15.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "8abb27dc009ff903fa6cd996ba401a75"
},
{
"dataPath": "params_shard_85.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.15.self_attn.qkv_proj.q_weight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "48a70cfd0e149cb6dc50cd1deadef2f1"
},
{
"dataPath": "params_shard_86.bin",
"format": "raw-shard",
"nbytes": 32587776,
"records": [
{
"name": "model.layers.14.self_attn.qkv_proj.q_scale",
"shape": [
12288,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 0
},
{
"name": "model.layers.14.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 3145728
},
{
"name": "model.layers.14.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 11534336
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 12582912
},
{
"name": "model.layers.15.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 12591104
},
{
"name": "model.layers.15.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 15409152
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21045248
},
{
"name": "model.layers.15.self_attn.qkv_proj.q_scale",
"shape": [
12288,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21053440
},
{
"name": "model.layers.15.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 24199168
}
],
"md5sum": "4f4f36e2818526cf0842960f7d8ad75c"
},
{
"dataPath": "params_shard_87.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "c78b7a2f861773e372d06b6fb42052c8"
},
{
"dataPath": "params_shard_88.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.16.self_attn.qkv_proj.q_weight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "cd2960571e4b149bbf9a5b716a323f05"
},
{
"dataPath": "params_shard_89.bin",
"format": "raw-shard",
"nbytes": 32063488,
"records": [
{
"name": "model.layers.15.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 0
},
{
"name": "model.layers.16.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 1048576
},
{
"name": "model.layers.16.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 1056768
},
{
"name": "model.layers.16.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 23601152
},
{
"name": "model.layers.16.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 26419200
},
{
"name": "model.layers.16.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32055296
}
],
"md5sum": "703262cae06915b24105e90cd6a87372"
},
{
"dataPath": "params_shard_90.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.17.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "03a0ed1bd150bc7bd967eb66df1edf13"
},
{
"dataPath": "params_shard_91.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.17.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "57939c5ad6e6581e0993404cd5f64940"
},
{
"dataPath": "params_shard_92.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.17.self_attn.qkv_proj.q_weight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "b9246392573a5a51892ca9aef62a3a5d"
},
{
"dataPath": "params_shard_93.bin",
"format": "raw-shard",
"nbytes": 32587776,
"records": [
{
"name": "model.layers.16.self_attn.qkv_proj.q_scale",
"shape": [
12288,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 0
},
{
"name": "model.layers.16.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 3145728
},
{
"name": "model.layers.16.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 11534336
},
{
"name": "model.layers.17.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 12582912
},
{
"name": "model.layers.17.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 12591104
},
{
"name": "model.layers.17.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 15409152
},
{
"name": "model.layers.17.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21045248
},
{
"name": "model.layers.17.self_attn.qkv_proj.q_scale",
"shape": [
12288,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21053440
},
{
"name": "model.layers.17.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 24199168
}
],
"md5sum": "333253248b7196678f3d9c1959b92a57"
},
{
"dataPath": "params_shard_94.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.18.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "495a1f4b16a5edb095ff39beaf5f7ade"
},
{
"dataPath": "params_shard_95.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.18.self_attn.qkv_proj.q_weight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "1780e787ee8e0884de3ccfb417a34756"
},
{
"dataPath": "params_shard_96.bin",
"format": "raw-shard",
"nbytes": 32063488,
"records": [
{
"name": "model.layers.17.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 0
},
{
"name": "model.layers.18.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 1048576
},
{
"name": "model.layers.18.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 1056768
},
{
"name": "model.layers.18.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 23601152
},
{
"name": "model.layers.18.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 26419200
},
{
"name": "model.layers.18.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32055296
}
],
"md5sum": "12cf05f21b913229bea57a0b11a6342d"
},
{
"dataPath": "params_shard_97.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.19.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "2e0b359804de7cce34fe8ec487fcef70"
},
{
"dataPath": "params_shard_98.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.19.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "321e37ce303010122381ff7ba33d9489"
},
{
"dataPath": "params_shard_99.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.19.self_attn.qkv_proj.q_weight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "38362abdc413be6f7e38509b780b7d00"
},
{
"dataPath": "params_shard_100.bin",
"format": "raw-shard",
"nbytes": 32587776,
"records": [
{
"name": "model.layers.18.self_attn.qkv_proj.q_scale",
"shape": [
12288,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 0
},
{
"name": "model.layers.18.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 3145728
},
{
"name": "model.layers.18.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 11534336
},
{
"name": "model.layers.19.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 12582912
},
{
"name": "model.layers.19.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 12591104
},
{
"name": "model.layers.19.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 15409152
},
{
"name": "model.layers.19.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21045248
},
{
"name": "model.layers.19.self_attn.qkv_proj.q_scale",
"shape": [
12288,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21053440
},
{
"name": "model.layers.19.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 24199168
}
],
"md5sum": "2838d979ea742fa325904333f2b4ec53"
},
{
"dataPath": "params_shard_101.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.20.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "7fec62c8c8d566c40590601e67447067"
},
{
"dataPath": "params_shard_102.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.20.self_attn.qkv_proj.q_weight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "c9926b700072cc24ffc5b6269c292932"
},
{
"dataPath": "params_shard_103.bin",
"format": "raw-shard",
"nbytes": 32063488,
"records": [
{
"name": "model.layers.19.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 0
},
{
"name": "model.layers.20.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 1048576
},
{
"name": "model.layers.20.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 1056768
},
{
"name": "model.layers.20.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 23601152
},
{
"name": "model.layers.20.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 26419200
},
{
"name": "model.layers.20.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32055296
}
],
"md5sum": "081017ec97a1753fe6747d3a6509f7fa"
},
{
"dataPath": "params_shard_104.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.21.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "2b654eecd33c2f14e94affa57a407d4f"
},
{
"dataPath": "params_shard_105.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.21.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "3fd651dfbc9bd572dc16562921c42749"
},
{
"dataPath": "params_shard_106.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.21.self_attn.qkv_proj.q_weight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "dcc3b12a15a17424414ec80e54dd903c"
},
{
"dataPath": "params_shard_107.bin",
"format": "raw-shard",
"nbytes": 32587776,
"records": [
{
"name": "model.layers.20.self_attn.qkv_proj.q_scale",
"shape": [
12288,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 0
},
{
"name": "model.layers.20.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 3145728
},
{
"name": "model.layers.20.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 11534336
},
{
"name": "model.layers.21.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 12582912
},
{
"name": "model.layers.21.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 12591104
},
{
"name": "model.layers.21.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 15409152
},
{
"name": "model.layers.21.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21045248
},
{
"name": "model.layers.21.self_attn.qkv_proj.q_scale",
"shape": [
12288,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21053440
},
{
"name": "model.layers.21.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 24199168
}
],
"md5sum": "67617b2c99e55c67d1b88e4d62cf898b"
},
{
"dataPath": "params_shard_108.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.22.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "aacb887623a903e667d406fbb232315f"
},
{
"dataPath": "params_shard_109.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.22.self_attn.qkv_proj.q_weight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "1064ef231cbb4efda1968f497767ab60"
},
{
"dataPath": "params_shard_110.bin",
"format": "raw-shard",
"nbytes": 32063488,
"records": [
{
"name": "model.layers.21.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 0
},
{
"name": "model.layers.22.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 1048576
},
{
"name": "model.layers.22.mlp.down_proj.q_weight",
"shape": [
4096,
1376
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 1056768
},
{
"name": "model.layers.22.mlp.down_proj.q_scale",
"shape": [
4096,
344
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 23601152
},
{
"name": "model.layers.22.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 26419200
},
{
"name": "model.layers.22.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32055296
}
],
"md5sum": "98bb069a3a926124ae44a3e597e4d405"
},
{
"dataPath": "params_shard_111.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.23.mlp.gate_up_proj.q_weight",
"shape": [
22016,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "10b42fa84f54e1cc25165c4f47f0814f"
},
{
"dataPath": "params_shard_112.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.23.self_attn.qkv_proj.q_weight",
"shape": [
12288,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "f5c8b9a27a461981cc677194f48a6570"
},
{
"dataPath": "params_shard_113.bin",
"format": "raw-shard",
"nbytes": 30801920,
"records": [
{
"name": "model.layers.22.self_attn.qkv_proj.q_scale",
"shape": [
12288,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 0
},
{
"name": "model.layers.22.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 3145728
},
{
"name": "model.layers.22.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 11534336
},
{
"name": "model.layers.23.mlp.gate_up_proj.q_scale",
"shape": [
22016,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 12582912
},
{
"name": "model.layers.23.self_attn.qkv_proj.q_scale",
"shape": [
12288,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 18219008
},
{
"name": "model.layers.23.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 21364736
},
{
"name": "model.layers.23.self_attn.o_proj.q_scale",
"shape": [
4096,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 29753344
}
],
"md5sum": "51646dd36c8556bd18c472c4a46a5e93"
}
]
}