diff --git "a/ndarray-cache.json" "b/ndarray-cache.json" new file mode 100644--- /dev/null +++ "b/ndarray-cache.json" @@ -0,0 +1,4705 @@ +{ + "metadata": { + "ParamSize": 390, + "ParamBytes": 1572915200.0, + "BitsPerParam": 4.501369085231279 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 64389120, + "records": [ + { + "name": "lm_head.q_weight", + "shape": [ + 320, + 50304 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 64389120, + "byteOffset": 0 + } + ], + "md5sum": "9c70e6dbc5f15d81ed7d01c4b739bc74" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 64389120, + "records": [ + { + "name": "model.embed_tokens.q_weight", + "shape": [ + 50304, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 64389120, + "byteOffset": 0 + } + ], + "md5sum": "3a6ee4dc739e9f93958b43601f7b5d3a" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.q_weight", + "shape": [ + 320, + 13824 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "defd30aaf59f44326a43c7b5af530f68" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 28282880, + "records": [ + { + "name": "lm_head.q_scale", + "shape": [ + 80, + 50304 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8048640, + "byteOffset": 0 + }, + { + "name": "model.embed_tokens.q_scale", + "shape": [ + 50304, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8048640, + "byteOffset": 8048640 + }, + { + "name": "model.layers.0.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 16097280 + }, + { + "name": "model.layers.0.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 16102400 + }, + { + "name": "model.layers.0.mlp.down_proj.q_weight", + "shape": [ + 864, + 2560 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 16107520 + }, + { + "name": "model.layers.0.mlp.down_proj.q_scale", + "shape": [ + 216, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 24954880 + }, + { + "name": "model.layers.0.mlp.gate_up_proj.q_scale", + "shape": [ + 80, + 13824 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 26060800 + }, + { + "name": "model.layers.0.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 28272640 + }, + { + "name": "model.layers.0.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 28277760 + } + ], + "md5sum": "26582e588f6d64f81caef39fb09627ff" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.q_weight", + "shape": [ + 320, + 13824 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "8756e595d4d02f7cb4763a188212cc5b" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 26931200, + "records": [ + { + "name": "model.layers.0.self_attn.qkv_proj.q_weight", + "shape": [ + 320, + 7680 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 0 + }, + { + "name": "model.layers.0.self_attn.qkv_proj.q_scale", + "shape": [ + 80, + 7680 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 9830400 + }, + { + "name": "model.layers.0.self_attn.o_proj.q_weight", + "shape": [ + 320, + 2560 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 11059200 + }, + { + "name": "model.layers.0.self_attn.o_proj.q_scale", + "shape": [ + 80, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 14336000 + }, + { + "name": "model.layers.1.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14745600 + }, + { + "name": "model.layers.1.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14750720 + }, + { + "name": "model.layers.1.mlp.down_proj.q_weight", + "shape": [ + 864, + 2560 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.1.mlp.down_proj.q_scale", + "shape": [ + 216, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 23603200 + }, + { + "name": "model.layers.1.mlp.gate_up_proj.q_scale", + "shape": [ + 80, + 13824 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 24709120 + }, + { + "name": "model.layers.1.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26920960 + }, + { + "name": "model.layers.1.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26926080 + } + ], + "md5sum": "8bd56a6d29d93bc4c1d698342a6c4a54" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.q_weight", + "shape": [ + 320, + 13824 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "d17dbc38cefd3a76fc5c34f73e45e3f1" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 26931200, + "records": [ + { + "name": "model.layers.1.self_attn.qkv_proj.q_weight", + "shape": [ + 320, + 7680 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 0 + }, + { + "name": "model.layers.1.self_attn.qkv_proj.q_scale", + "shape": [ + 80, + 7680 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 9830400 + }, + { + "name": "model.layers.1.self_attn.o_proj.q_weight", + "shape": [ + 320, + 2560 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 11059200 + }, + { + "name": "model.layers.1.self_attn.o_proj.q_scale", + "shape": [ + 80, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 14336000 + }, + { + "name": "model.layers.10.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14745600 + }, + { + "name": "model.layers.10.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14750720 + }, + { + "name": "model.layers.10.mlp.down_proj.q_weight", + "shape": [ + 864, + 2560 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.10.mlp.down_proj.q_scale", + "shape": [ + 216, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 23603200 + }, + { + "name": "model.layers.10.mlp.gate_up_proj.q_scale", + "shape": [ + 80, + 13824 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 24709120 + }, + { + "name": "model.layers.10.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26920960 + }, + { + "name": "model.layers.10.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26926080 + } + ], + "md5sum": "f01c9f2d50dd41a3ccf8128432663d84" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.q_weight", + "shape": [ + 320, + 13824 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "fa584c5a17b80f4f3fe449e5e77bdf6d" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 26931200, + "records": [ + { + "name": "model.layers.10.self_attn.qkv_proj.q_weight", + "shape": [ + 320, + 7680 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 0 + }, + { + "name": "model.layers.10.self_attn.qkv_proj.q_scale", + "shape": [ + 80, + 7680 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 9830400 + }, + { + "name": "model.layers.10.self_attn.o_proj.q_weight", + "shape": [ + 320, + 2560 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 11059200 + }, + { + "name": "model.layers.10.self_attn.o_proj.q_scale", + "shape": [ + 80, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 14336000 + }, + { + "name": "model.layers.11.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14745600 + }, + { + "name": "model.layers.11.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14750720 + }, + { + "name": "model.layers.11.mlp.down_proj.q_weight", + "shape": [ + 864, + 2560 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.11.mlp.down_proj.q_scale", + "shape": [ + 216, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 23603200 + }, + { + "name": "model.layers.11.mlp.gate_up_proj.q_scale", + "shape": [ + 80, + 13824 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 24709120 + }, + { + "name": "model.layers.11.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26920960 + }, + { + "name": "model.layers.11.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26926080 + } + ], + "md5sum": "9c915d8388d36acd5ed680ece11b9398" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.q_weight", + "shape": [ + 320, + 13824 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "0546abcdcd8adb6b4a074945c7ffc0b2" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 26931200, + "records": [ + { + "name": "model.layers.11.self_attn.qkv_proj.q_weight", + "shape": [ + 320, + 7680 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 0 + }, + { + "name": "model.layers.11.self_attn.qkv_proj.q_scale", + "shape": [ + 80, + 7680 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 9830400 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_weight", + "shape": [ + 320, + 2560 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 11059200 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_scale", + "shape": [ + 80, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 14336000 + }, + { + "name": "model.layers.12.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14745600 + }, + { + "name": "model.layers.12.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14750720 + }, + { + "name": "model.layers.12.mlp.down_proj.q_weight", + "shape": [ + 864, + 2560 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.12.mlp.down_proj.q_scale", + "shape": [ + 216, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 23603200 + }, + { + "name": "model.layers.12.mlp.gate_up_proj.q_scale", + "shape": [ + 80, + 13824 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 24709120 + }, + { + "name": "model.layers.12.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26920960 + }, + { + "name": "model.layers.12.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26926080 + } + ], + "md5sum": "6c8db95f5cb6f8b12e5ab356a0c6ad54" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.13.mlp.gate_up_proj.q_weight", + "shape": [ + 320, + 13824 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "316651889cbddee80be8de34c7f978d9" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 26931200, + "records": [ + { + "name": "model.layers.12.self_attn.qkv_proj.q_weight", + "shape": [ + 320, + 7680 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 0 + }, + { + "name": "model.layers.12.self_attn.qkv_proj.q_scale", + "shape": [ + 80, + 7680 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 9830400 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_weight", + "shape": [ + 320, + 2560 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 11059200 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_scale", + "shape": [ + 80, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 14336000 + }, + { + "name": "model.layers.13.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14745600 + }, + { + "name": "model.layers.13.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14750720 + }, + { + "name": "model.layers.13.mlp.down_proj.q_weight", + "shape": [ + 864, + 2560 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.13.mlp.down_proj.q_scale", + "shape": [ + 216, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 23603200 + }, + { + "name": "model.layers.13.mlp.gate_up_proj.q_scale", + "shape": [ + 80, + 13824 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 24709120 + }, + { + "name": "model.layers.13.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26920960 + }, + { + "name": "model.layers.13.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26926080 + } + ], + "md5sum": "cb9ed2206010d6abc1677ea0d66c72e6" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.q_weight", + "shape": [ + 320, + 13824 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "9caa0cb18c7b2b603a93ac75fc12e35c" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 26931200, + "records": [ + { + "name": "model.layers.13.self_attn.qkv_proj.q_weight", + "shape": [ + 320, + 7680 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 0 + }, + { + "name": "model.layers.13.self_attn.qkv_proj.q_scale", + "shape": [ + 80, + 7680 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 9830400 + }, + { + "name": "model.layers.13.self_attn.o_proj.q_weight", + "shape": [ + 320, + 2560 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 11059200 + }, + { + "name": "model.layers.13.self_attn.o_proj.q_scale", + "shape": [ + 80, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 14336000 + }, + { + "name": "model.layers.14.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14745600 + }, + { + "name": "model.layers.14.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14750720 + }, + { + "name": "model.layers.14.mlp.down_proj.q_weight", + "shape": [ + 864, + 2560 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.14.mlp.down_proj.q_scale", + "shape": [ + 216, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 23603200 + }, + { + "name": "model.layers.14.mlp.gate_up_proj.q_scale", + "shape": [ + 80, + 13824 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 24709120 + }, + { + "name": "model.layers.14.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26920960 + }, + { + "name": "model.layers.14.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26926080 + } + ], + "md5sum": "b0275852220b79d81932ce2f0d22d957" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.15.mlp.gate_up_proj.q_weight", + "shape": [ + 320, + 13824 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "896c0a14f52155af65e77156da56285e" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 26931200, + "records": [ + { + "name": "model.layers.14.self_attn.qkv_proj.q_weight", + "shape": [ + 320, + 7680 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 0 + }, + { + "name": "model.layers.14.self_attn.qkv_proj.q_scale", + "shape": [ + 80, + 7680 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 9830400 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_weight", + "shape": [ + 320, + 2560 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 11059200 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_scale", + "shape": [ + 80, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 14336000 + }, + { + "name": "model.layers.15.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14745600 + }, + { + "name": "model.layers.15.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14750720 + }, + { + "name": "model.layers.15.mlp.down_proj.q_weight", + "shape": [ + 864, + 2560 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.15.mlp.down_proj.q_scale", + "shape": [ + 216, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 23603200 + }, + { + "name": "model.layers.15.mlp.gate_up_proj.q_scale", + "shape": [ + 80, + 13824 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 24709120 + }, + { + "name": "model.layers.15.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26920960 + }, + { + "name": "model.layers.15.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26926080 + } + ], + "md5sum": "7850e3db3bac556811e1fc82333c0aad" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.q_weight", + "shape": [ + 320, + 13824 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "b644c283c6005af5909a00d1da4d5c05" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 26931200, + "records": [ + { + "name": "model.layers.15.self_attn.qkv_proj.q_weight", + "shape": [ + 320, + 7680 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 0 + }, + { + "name": "model.layers.15.self_attn.qkv_proj.q_scale", + "shape": [ + 80, + 7680 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 9830400 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_weight", + "shape": [ + 320, + 2560 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 11059200 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_scale", + "shape": [ + 80, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 14336000 + }, + { + "name": "model.layers.16.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14745600 + }, + { + "name": "model.layers.16.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14750720 + }, + { + "name": "model.layers.16.mlp.down_proj.q_weight", + "shape": [ + 864, + 2560 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.16.mlp.down_proj.q_scale", + "shape": [ + 216, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 23603200 + }, + { + "name": "model.layers.16.mlp.gate_up_proj.q_scale", + "shape": [ + 80, + 13824 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 24709120 + }, + { + "name": "model.layers.16.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26920960 + }, + { + "name": "model.layers.16.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26926080 + } + ], + "md5sum": "fee4b0a8805927f68c12168b21f2de17" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.17.mlp.gate_up_proj.q_weight", + "shape": [ + 320, + 13824 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "bc3a5d97504d3cab428d5ecb69620c5f" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 26931200, + "records": [ + { + "name": "model.layers.16.self_attn.qkv_proj.q_weight", + "shape": [ + 320, + 7680 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 0 + }, + { + "name": "model.layers.16.self_attn.qkv_proj.q_scale", + "shape": [ + 80, + 7680 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 9830400 + }, + { + "name": "model.layers.16.self_attn.o_proj.q_weight", + "shape": [ + 320, + 2560 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 11059200 + }, + { + "name": "model.layers.16.self_attn.o_proj.q_scale", + "shape": [ + 80, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 14336000 + }, + { + "name": "model.layers.17.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14745600 + }, + { + "name": "model.layers.17.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14750720 + }, + { + "name": "model.layers.17.mlp.down_proj.q_weight", + "shape": [ + 864, + 2560 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.17.mlp.down_proj.q_scale", + "shape": [ + 216, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 23603200 + }, + { + "name": "model.layers.17.mlp.gate_up_proj.q_scale", + "shape": [ + 80, + 13824 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 24709120 + }, + { + "name": "model.layers.17.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26920960 + }, + { + "name": "model.layers.17.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26926080 + } + ], + "md5sum": "144e3a77cfaeb8d5d2291bc15f226d4b" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.18.mlp.gate_up_proj.q_weight", + "shape": [ + 320, + 13824 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "bb32f2c096e53bf03b1eaee9aa957954" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 26931200, + "records": [ + { + "name": "model.layers.17.self_attn.qkv_proj.q_weight", + "shape": [ + 320, + 7680 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 0 + }, + { + "name": "model.layers.17.self_attn.qkv_proj.q_scale", + "shape": [ + 80, + 7680 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 9830400 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_weight", + "shape": [ + 320, + 2560 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 11059200 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_scale", + "shape": [ + 80, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 14336000 + }, + { + "name": "model.layers.18.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14745600 + }, + { + "name": "model.layers.18.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14750720 + }, + { + "name": "model.layers.18.mlp.down_proj.q_weight", + "shape": [ + 864, + 2560 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.18.mlp.down_proj.q_scale", + "shape": [ + 216, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 23603200 + }, + { + "name": "model.layers.18.mlp.gate_up_proj.q_scale", + "shape": [ + 80, + 13824 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 24709120 + }, + { + "name": "model.layers.18.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26920960 + }, + { + "name": "model.layers.18.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26926080 + } + ], + "md5sum": "ec323d231898f985190170909866a4da" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.19.mlp.gate_up_proj.q_weight", + "shape": [ + 320, + 13824 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "0963cf2c2b772d67de252c801a5790af" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 26931200, + "records": [ + { + "name": "model.layers.18.self_attn.qkv_proj.q_weight", + "shape": [ + 320, + 7680 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 0 + }, + { + "name": "model.layers.18.self_attn.qkv_proj.q_scale", + "shape": [ + 80, + 7680 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 9830400 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_weight", + "shape": [ + 320, + 2560 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 11059200 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_scale", + "shape": [ + 80, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 14336000 + }, + { + "name": "model.layers.19.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14745600 + }, + { + "name": "model.layers.19.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14750720 + }, + { + "name": "model.layers.19.mlp.down_proj.q_weight", + "shape": [ + 864, + 2560 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.19.mlp.down_proj.q_scale", + "shape": [ + 216, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 23603200 + }, + { + "name": "model.layers.19.mlp.gate_up_proj.q_scale", + "shape": [ + 80, + 13824 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 24709120 + }, + { + "name": "model.layers.19.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26920960 + }, + { + "name": "model.layers.19.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26926080 + } + ], + "md5sum": "9c67af5d02fea520753e9a6919c8a928" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.q_weight", + "shape": [ + 320, + 13824 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "555cda5d3784fd4f82152d180e4d6c19" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 26931200, + "records": [ + { + "name": "model.layers.19.self_attn.qkv_proj.q_weight", + "shape": [ + 320, + 7680 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 0 + }, + { + "name": "model.layers.19.self_attn.qkv_proj.q_scale", + "shape": [ + 80, + 7680 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 9830400 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_weight", + "shape": [ + 320, + 2560 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 11059200 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_scale", + "shape": [ + 80, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 14336000 + }, + { + "name": "model.layers.2.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14745600 + }, + { + "name": "model.layers.2.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14750720 + }, + { + "name": "model.layers.2.mlp.down_proj.q_weight", + "shape": [ + 864, + 2560 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.2.mlp.down_proj.q_scale", + "shape": [ + 216, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 23603200 + }, + { + "name": "model.layers.2.mlp.gate_up_proj.q_scale", + "shape": [ + 80, + 13824 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 24709120 + }, + { + "name": "model.layers.2.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26920960 + }, + { + "name": "model.layers.2.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26926080 + } + ], + "md5sum": "b8180885e088dc9f66f040c87ff88ab1" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.q_weight", + "shape": [ + 320, + 13824 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "6431d94ba1413b8c9c0252bb16c2f11b" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 26931200, + "records": [ + { + "name": "model.layers.2.self_attn.qkv_proj.q_weight", + "shape": [ + 320, + 7680 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 0 + }, + { + "name": "model.layers.2.self_attn.qkv_proj.q_scale", + "shape": [ + 80, + 7680 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 9830400 + }, + { + "name": "model.layers.2.self_attn.o_proj.q_weight", + "shape": [ + 320, + 2560 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 11059200 + }, + { + "name": "model.layers.2.self_attn.o_proj.q_scale", + "shape": [ + 80, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 14336000 + }, + { + "name": "model.layers.20.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14745600 + }, + { + "name": "model.layers.20.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14750720 + }, + { + "name": "model.layers.20.mlp.down_proj.q_weight", + "shape": [ + 864, + 2560 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.20.mlp.down_proj.q_scale", + "shape": [ + 216, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 23603200 + }, + { + "name": "model.layers.20.mlp.gate_up_proj.q_scale", + "shape": [ + 80, + 13824 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 24709120 + }, + { + "name": "model.layers.20.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26920960 + }, + { + "name": "model.layers.20.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26926080 + } + ], + "md5sum": "a8b1f36506680ae57be9ef959f7f3622" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.21.mlp.gate_up_proj.q_weight", + "shape": [ + 320, + 13824 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "68848168933bf0046267bdbdda565978" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 26931200, + "records": [ + { + "name": "model.layers.20.self_attn.qkv_proj.q_weight", + "shape": [ + 320, + 7680 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 0 + }, + { + "name": "model.layers.20.self_attn.qkv_proj.q_scale", + "shape": [ + 80, + 7680 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 9830400 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_weight", + "shape": [ + 320, + 2560 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 11059200 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_scale", + "shape": [ + 80, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 14336000 + }, + { + "name": "model.layers.21.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14745600 + }, + { + "name": "model.layers.21.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14750720 + }, + { + "name": "model.layers.21.mlp.down_proj.q_weight", + "shape": [ + 864, + 2560 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.21.mlp.down_proj.q_scale", + "shape": [ + 216, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 23603200 + }, + { + "name": "model.layers.21.mlp.gate_up_proj.q_scale", + "shape": [ + 80, + 13824 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 24709120 + }, + { + "name": "model.layers.21.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26920960 + }, + { + "name": "model.layers.21.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26926080 + } + ], + "md5sum": "ee271be135c153f8cdd9678c5d814d89" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.q_weight", + "shape": [ + 320, + 13824 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "a4b14d94946773dcc241391cfa185663" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 26931200, + "records": [ + { + "name": "model.layers.21.self_attn.qkv_proj.q_weight", + "shape": [ + 320, + 7680 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 0 + }, + { + "name": "model.layers.21.self_attn.qkv_proj.q_scale", + "shape": [ + 80, + 7680 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 9830400 + }, + { + "name": "model.layers.21.self_attn.o_proj.q_weight", + "shape": [ + 320, + 2560 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 11059200 + }, + { + "name": "model.layers.21.self_attn.o_proj.q_scale", + "shape": [ + 80, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 14336000 + }, + { + "name": "model.layers.22.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14745600 + }, + { + "name": "model.layers.22.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14750720 + }, + { + "name": "model.layers.22.mlp.down_proj.q_weight", + "shape": [ + 864, + 2560 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.22.mlp.down_proj.q_scale", + "shape": [ + 216, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 23603200 + }, + { + "name": "model.layers.22.mlp.gate_up_proj.q_scale", + "shape": [ + 80, + 13824 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 24709120 + }, + { + "name": "model.layers.22.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26920960 + }, + { + "name": "model.layers.22.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26926080 + } + ], + "md5sum": "3e164346d675380f4af03346fbafebcb" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.23.mlp.gate_up_proj.q_weight", + "shape": [ + 320, + 13824 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "177e16dfe56f2536ce86e2e81c85de10" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 26931200, + "records": [ + { + "name": "model.layers.22.self_attn.qkv_proj.q_weight", + "shape": [ + 320, + 7680 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 0 + }, + { + "name": "model.layers.22.self_attn.qkv_proj.q_scale", + "shape": [ + 80, + 7680 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 9830400 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_weight", + "shape": [ + 320, + 2560 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 11059200 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_scale", + "shape": [ + 80, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 14336000 + }, + { + "name": "model.layers.23.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14745600 + }, + { + "name": "model.layers.23.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14750720 + }, + { + "name": "model.layers.23.mlp.down_proj.q_weight", + "shape": [ + 864, + 2560 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.23.mlp.down_proj.q_scale", + "shape": [ + 216, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 23603200 + }, + { + "name": "model.layers.23.mlp.gate_up_proj.q_scale", + "shape": [ + 80, + 13824 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 24709120 + }, + { + "name": "model.layers.23.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26920960 + }, + { + "name": "model.layers.23.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26926080 + } + ], + "md5sum": "ee3325fe6f581c1830a1cbf6194df9f8" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.q_weight", + "shape": [ + 320, + 13824 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "dfa9c1a980ee4b959ec768ab0a1a8c45" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 26931200, + "records": [ + { + "name": "model.layers.23.self_attn.qkv_proj.q_weight", + "shape": [ + 320, + 7680 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 0 + }, + { + "name": "model.layers.23.self_attn.qkv_proj.q_scale", + "shape": [ + 80, + 7680 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 9830400 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_weight", + "shape": [ + 320, + 2560 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 11059200 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_scale", + "shape": [ + 80, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 14336000 + }, + { + "name": "model.layers.24.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14745600 + }, + { + "name": "model.layers.24.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14750720 + }, + { + "name": "model.layers.24.mlp.down_proj.q_weight", + "shape": [ + 864, + 2560 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.24.mlp.down_proj.q_scale", + "shape": [ + 216, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 23603200 + }, + { + "name": "model.layers.24.mlp.gate_up_proj.q_scale", + "shape": [ + 80, + 13824 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 24709120 + }, + { + "name": "model.layers.24.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26920960 + }, + { + "name": "model.layers.24.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26926080 + } + ], + "md5sum": "ec05de101a3f3ed1cb5aceabaf727518" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.25.mlp.gate_up_proj.q_weight", + "shape": [ + 320, + 13824 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "e7656fbb0f6746a07d93ed97969b30c3" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 26931200, + "records": [ + { + "name": "model.layers.24.self_attn.qkv_proj.q_weight", + "shape": [ + 320, + 7680 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 0 + }, + { + "name": "model.layers.24.self_attn.qkv_proj.q_scale", + "shape": [ + 80, + 7680 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 9830400 + }, + { + "name": "model.layers.24.self_attn.o_proj.q_weight", + "shape": [ + 320, + 2560 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 11059200 + }, + { + "name": "model.layers.24.self_attn.o_proj.q_scale", + "shape": [ + 80, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 14336000 + }, + { + "name": "model.layers.25.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14745600 + }, + { + "name": "model.layers.25.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14750720 + }, + { + "name": "model.layers.25.mlp.down_proj.q_weight", + "shape": [ + 864, + 2560 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.25.mlp.down_proj.q_scale", + "shape": [ + 216, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 23603200 + }, + { + "name": "model.layers.25.mlp.gate_up_proj.q_scale", + "shape": [ + 80, + 13824 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 24709120 + }, + { + "name": "model.layers.25.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26920960 + }, + { + "name": "model.layers.25.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26926080 + } + ], + "md5sum": "225162cfdf8b83c1ca3d6e4b07df4fcd" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.26.mlp.gate_up_proj.q_weight", + "shape": [ + 320, + 13824 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "b686f60636e4f9ec1a8c48ee698a5659" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 26931200, + "records": [ + { + "name": "model.layers.25.self_attn.qkv_proj.q_weight", + "shape": [ + 320, + 7680 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 0 + }, + { + "name": "model.layers.25.self_attn.qkv_proj.q_scale", + "shape": [ + 80, + 7680 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 9830400 + }, + { + "name": "model.layers.25.self_attn.o_proj.q_weight", + "shape": [ + 320, + 2560 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 11059200 + }, + { + "name": "model.layers.25.self_attn.o_proj.q_scale", + "shape": [ + 80, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 14336000 + }, + { + "name": "model.layers.26.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14745600 + }, + { + "name": "model.layers.26.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14750720 + }, + { + "name": "model.layers.26.mlp.down_proj.q_weight", + "shape": [ + 864, + 2560 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.26.mlp.down_proj.q_scale", + "shape": [ + 216, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 23603200 + }, + { + "name": "model.layers.26.mlp.gate_up_proj.q_scale", + "shape": [ + 80, + 13824 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 24709120 + }, + { + "name": "model.layers.26.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26920960 + }, + { + "name": "model.layers.26.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26926080 + } + ], + "md5sum": "9ca9908e19a0101ab18571d822d8715d" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.27.mlp.gate_up_proj.q_weight", + "shape": [ + 320, + 13824 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "c03e9922fa95411ed21ccb73dd2c6358" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 26931200, + "records": [ + { + "name": "model.layers.26.self_attn.qkv_proj.q_weight", + "shape": [ + 320, + 7680 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 0 + }, + { + "name": "model.layers.26.self_attn.qkv_proj.q_scale", + "shape": [ + 80, + 7680 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 9830400 + }, + { + "name": "model.layers.26.self_attn.o_proj.q_weight", + "shape": [ + 320, + 2560 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 11059200 + }, + { + "name": "model.layers.26.self_attn.o_proj.q_scale", + "shape": [ + 80, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 14336000 + }, + { + "name": "model.layers.27.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14745600 + }, + { + "name": "model.layers.27.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14750720 + }, + { + "name": "model.layers.27.mlp.down_proj.q_weight", + "shape": [ + 864, + 2560 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.27.mlp.down_proj.q_scale", + "shape": [ + 216, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 23603200 + }, + { + "name": "model.layers.27.mlp.gate_up_proj.q_scale", + "shape": [ + 80, + 13824 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 24709120 + }, + { + "name": "model.layers.27.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26920960 + }, + { + "name": "model.layers.27.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26926080 + } + ], + "md5sum": "93781c44d2164c7c7543117b6e6c5c42" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.28.mlp.gate_up_proj.q_weight", + "shape": [ + 320, + 13824 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "1093a677256e20b2e49f77844e3d041e" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 26931200, + "records": [ + { + "name": "model.layers.27.self_attn.qkv_proj.q_weight", + "shape": [ + 320, + 7680 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 0 + }, + { + "name": "model.layers.27.self_attn.qkv_proj.q_scale", + "shape": [ + 80, + 7680 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 9830400 + }, + { + "name": "model.layers.27.self_attn.o_proj.q_weight", + "shape": [ + 320, + 2560 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 11059200 + }, + { + "name": "model.layers.27.self_attn.o_proj.q_scale", + "shape": [ + 80, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 14336000 + }, + { + "name": "model.layers.28.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14745600 + }, + { + "name": "model.layers.28.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14750720 + }, + { + "name": "model.layers.28.mlp.down_proj.q_weight", + "shape": [ + 864, + 2560 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.28.mlp.down_proj.q_scale", + "shape": [ + 216, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 23603200 + }, + { + "name": "model.layers.28.mlp.gate_up_proj.q_scale", + "shape": [ + 80, + 13824 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 24709120 + }, + { + "name": "model.layers.28.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26920960 + }, + { + "name": "model.layers.28.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26926080 + } + ], + "md5sum": "115b5deca2184cd6b91a3a6cf9150907" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.29.mlp.gate_up_proj.q_weight", + "shape": [ + 320, + 13824 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "09b528c64cdba29d029781f08c04a08b" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 26931200, + "records": [ + { + "name": "model.layers.28.self_attn.qkv_proj.q_weight", + "shape": [ + 320, + 7680 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 0 + }, + { + "name": "model.layers.28.self_attn.qkv_proj.q_scale", + "shape": [ + 80, + 7680 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 9830400 + }, + { + "name": "model.layers.28.self_attn.o_proj.q_weight", + "shape": [ + 320, + 2560 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 11059200 + }, + { + "name": "model.layers.28.self_attn.o_proj.q_scale", + "shape": [ + 80, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 14336000 + }, + { + "name": "model.layers.29.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14745600 + }, + { + "name": "model.layers.29.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14750720 + }, + { + "name": "model.layers.29.mlp.down_proj.q_weight", + "shape": [ + 864, + 2560 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.29.mlp.down_proj.q_scale", + "shape": [ + 216, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 23603200 + }, + { + "name": "model.layers.29.mlp.gate_up_proj.q_scale", + "shape": [ + 80, + 13824 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 24709120 + }, + { + "name": "model.layers.29.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26920960 + }, + { + "name": "model.layers.29.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26926080 + } + ], + "md5sum": "34aa6c26c7b79979f217adcabed964f7" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.q_weight", + "shape": [ + 320, + 13824 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "60dcd26a1418b6650a13e56dabdbfdfe" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 26931200, + "records": [ + { + "name": "model.layers.29.self_attn.qkv_proj.q_weight", + "shape": [ + 320, + 7680 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 0 + }, + { + "name": "model.layers.29.self_attn.qkv_proj.q_scale", + "shape": [ + 80, + 7680 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 9830400 + }, + { + "name": "model.layers.29.self_attn.o_proj.q_weight", + "shape": [ + 320, + 2560 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 11059200 + }, + { + "name": "model.layers.29.self_attn.o_proj.q_scale", + "shape": [ + 80, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 14336000 + }, + { + "name": "model.layers.3.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14745600 + }, + { + "name": "model.layers.3.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14750720 + }, + { + "name": "model.layers.3.mlp.down_proj.q_weight", + "shape": [ + 864, + 2560 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.3.mlp.down_proj.q_scale", + "shape": [ + 216, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 23603200 + }, + { + "name": "model.layers.3.mlp.gate_up_proj.q_scale", + "shape": [ + 80, + 13824 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 24709120 + }, + { + "name": "model.layers.3.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26920960 + }, + { + "name": "model.layers.3.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26926080 + } + ], + "md5sum": "5641a4fe7c14806eb0509ff546cfa247" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.30.mlp.gate_up_proj.q_weight", + "shape": [ + 320, + 13824 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "b34235dda9db8836ca29abe8c1af7751" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 26931200, + "records": [ + { + "name": "model.layers.3.self_attn.qkv_proj.q_weight", + "shape": [ + 320, + 7680 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 0 + }, + { + "name": "model.layers.3.self_attn.qkv_proj.q_scale", + "shape": [ + 80, + 7680 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 9830400 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_weight", + "shape": [ + 320, + 2560 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 11059200 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_scale", + "shape": [ + 80, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 14336000 + }, + { + "name": "model.layers.30.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14745600 + }, + { + "name": "model.layers.30.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14750720 + }, + { + "name": "model.layers.30.mlp.down_proj.q_weight", + "shape": [ + 864, + 2560 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.30.mlp.down_proj.q_scale", + "shape": [ + 216, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 23603200 + }, + { + "name": "model.layers.30.mlp.gate_up_proj.q_scale", + "shape": [ + 80, + 13824 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 24709120 + }, + { + "name": "model.layers.30.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26920960 + }, + { + "name": "model.layers.30.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26926080 + } + ], + "md5sum": "8b811ed94c2a688f69978e80a6b59879" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.31.mlp.gate_up_proj.q_weight", + "shape": [ + 320, + 13824 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "e6783356cf54f40b698430f25aee2a39" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 26931200, + "records": [ + { + "name": "model.layers.30.self_attn.qkv_proj.q_weight", + "shape": [ + 320, + 7680 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 0 + }, + { + "name": "model.layers.30.self_attn.qkv_proj.q_scale", + "shape": [ + 80, + 7680 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 9830400 + }, + { + "name": "model.layers.30.self_attn.o_proj.q_weight", + "shape": [ + 320, + 2560 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 11059200 + }, + { + "name": "model.layers.30.self_attn.o_proj.q_scale", + "shape": [ + 80, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 14336000 + }, + { + "name": "model.layers.31.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14745600 + }, + { + "name": "model.layers.31.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14750720 + }, + { + "name": "model.layers.31.mlp.down_proj.q_weight", + "shape": [ + 864, + 2560 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.31.mlp.down_proj.q_scale", + "shape": [ + 216, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 23603200 + }, + { + "name": "model.layers.31.mlp.gate_up_proj.q_scale", + "shape": [ + 80, + 13824 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 24709120 + }, + { + "name": "model.layers.31.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26920960 + }, + { + "name": "model.layers.31.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26926080 + } + ], + "md5sum": "6a6db0fc8ae1b8e9e40dd5bcb4c5e878" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.q_weight", + "shape": [ + 320, + 13824 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "fbbf292c9c6249179487ccb7b5a6071a" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 26931200, + "records": [ + { + "name": "model.layers.31.self_attn.qkv_proj.q_weight", + "shape": [ + 320, + 7680 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 0 + }, + { + "name": "model.layers.31.self_attn.qkv_proj.q_scale", + "shape": [ + 80, + 7680 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 9830400 + }, + { + "name": "model.layers.31.self_attn.o_proj.q_weight", + "shape": [ + 320, + 2560 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 11059200 + }, + { + "name": "model.layers.31.self_attn.o_proj.q_scale", + "shape": [ + 80, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 14336000 + }, + { + "name": "model.layers.4.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14745600 + }, + { + "name": "model.layers.4.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14750720 + }, + { + "name": "model.layers.4.mlp.down_proj.q_weight", + "shape": [ + 864, + 2560 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.4.mlp.down_proj.q_scale", + "shape": [ + 216, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 23603200 + }, + { + "name": "model.layers.4.mlp.gate_up_proj.q_scale", + "shape": [ + 80, + 13824 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 24709120 + }, + { + "name": "model.layers.4.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26920960 + }, + { + "name": "model.layers.4.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26926080 + } + ], + "md5sum": "d0dcf8eb817303df5164699b7c5b6055" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.q_weight", + "shape": [ + 320, + 13824 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "b90fc6556e6c216cd2feedcbbea35371" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 26931200, + "records": [ + { + "name": "model.layers.4.self_attn.qkv_proj.q_weight", + "shape": [ + 320, + 7680 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 0 + }, + { + "name": "model.layers.4.self_attn.qkv_proj.q_scale", + "shape": [ + 80, + 7680 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 9830400 + }, + { + "name": "model.layers.4.self_attn.o_proj.q_weight", + "shape": [ + 320, + 2560 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 11059200 + }, + { + "name": "model.layers.4.self_attn.o_proj.q_scale", + "shape": [ + 80, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 14336000 + }, + { + "name": "model.layers.5.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14745600 + }, + { + "name": "model.layers.5.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14750720 + }, + { + "name": "model.layers.5.mlp.down_proj.q_weight", + "shape": [ + 864, + 2560 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.5.mlp.down_proj.q_scale", + "shape": [ + 216, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 23603200 + }, + { + "name": "model.layers.5.mlp.gate_up_proj.q_scale", + "shape": [ + 80, + 13824 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 24709120 + }, + { + "name": "model.layers.5.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26920960 + }, + { + "name": "model.layers.5.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26926080 + } + ], + "md5sum": "272885b923f418e48fb786ed888c736d" + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.q_weight", + "shape": [ + 320, + 13824 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "64ec245a9bd39ba29350a2f7f4eaf7b6" + }, + { + "dataPath": "params_shard_59.bin", + "format": "raw-shard", + "nbytes": 26931200, + "records": [ + { + "name": "model.layers.5.self_attn.qkv_proj.q_weight", + "shape": [ + 320, + 7680 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 0 + }, + { + "name": "model.layers.5.self_attn.qkv_proj.q_scale", + "shape": [ + 80, + 7680 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 9830400 + }, + { + "name": "model.layers.5.self_attn.o_proj.q_weight", + "shape": [ + 320, + 2560 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 11059200 + }, + { + "name": "model.layers.5.self_attn.o_proj.q_scale", + "shape": [ + 80, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 14336000 + }, + { + "name": "model.layers.6.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14745600 + }, + { + "name": "model.layers.6.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14750720 + }, + { + "name": "model.layers.6.mlp.down_proj.q_weight", + "shape": [ + 864, + 2560 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.6.mlp.down_proj.q_scale", + "shape": [ + 216, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 23603200 + }, + { + "name": "model.layers.6.mlp.gate_up_proj.q_scale", + "shape": [ + 80, + 13824 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 24709120 + }, + { + "name": "model.layers.6.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26920960 + }, + { + "name": "model.layers.6.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26926080 + } + ], + "md5sum": "5b8d6f00dfd4f8575d60b1abf018bdf3" + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.q_weight", + "shape": [ + 320, + 13824 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "e7e52d7baec5a8c4b51f05ba79671b73" + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 26931200, + "records": [ + { + "name": "model.layers.6.self_attn.qkv_proj.q_weight", + "shape": [ + 320, + 7680 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 0 + }, + { + "name": "model.layers.6.self_attn.qkv_proj.q_scale", + "shape": [ + 80, + 7680 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 9830400 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_weight", + "shape": [ + 320, + 2560 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 11059200 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_scale", + "shape": [ + 80, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 14336000 + }, + { + "name": "model.layers.7.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14745600 + }, + { + "name": "model.layers.7.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14750720 + }, + { + "name": "model.layers.7.mlp.down_proj.q_weight", + "shape": [ + 864, + 2560 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.7.mlp.down_proj.q_scale", + "shape": [ + 216, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 23603200 + }, + { + "name": "model.layers.7.mlp.gate_up_proj.q_scale", + "shape": [ + 80, + 13824 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 24709120 + }, + { + "name": "model.layers.7.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26920960 + }, + { + "name": "model.layers.7.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26926080 + } + ], + "md5sum": "e83664b5a18b8cc3deb78dcd2ce9c0d6" + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.q_weight", + "shape": [ + 320, + 13824 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "1d7b2e7015e729d283d211f3616d0b1f" + }, + { + "dataPath": "params_shard_63.bin", + "format": "raw-shard", + "nbytes": 26931200, + "records": [ + { + "name": "model.layers.7.self_attn.qkv_proj.q_weight", + "shape": [ + 320, + 7680 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 0 + }, + { + "name": "model.layers.7.self_attn.qkv_proj.q_scale", + "shape": [ + 80, + 7680 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 9830400 + }, + { + "name": "model.layers.7.self_attn.o_proj.q_weight", + "shape": [ + 320, + 2560 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 11059200 + }, + { + "name": "model.layers.7.self_attn.o_proj.q_scale", + "shape": [ + 80, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 14336000 + }, + { + "name": "model.layers.8.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14745600 + }, + { + "name": "model.layers.8.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14750720 + }, + { + "name": "model.layers.8.mlp.down_proj.q_weight", + "shape": [ + 864, + 2560 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.8.mlp.down_proj.q_scale", + "shape": [ + 216, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 23603200 + }, + { + "name": "model.layers.8.mlp.gate_up_proj.q_scale", + "shape": [ + 80, + 13824 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 24709120 + }, + { + "name": "model.layers.8.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26920960 + }, + { + "name": "model.layers.8.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26926080 + } + ], + "md5sum": "fabb615bd706a9d2448fcaa1367a5197" + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.9.mlp.gate_up_proj.q_weight", + "shape": [ + 320, + 13824 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "8c55d2182d926d57e7ae810ba616daec" + }, + { + "dataPath": "params_shard_65.bin", + "format": "raw-shard", + "nbytes": 26931200, + "records": [ + { + "name": "model.layers.8.self_attn.qkv_proj.q_weight", + "shape": [ + 320, + 7680 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 0 + }, + { + "name": "model.layers.8.self_attn.qkv_proj.q_scale", + "shape": [ + 80, + 7680 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 9830400 + }, + { + "name": "model.layers.8.self_attn.o_proj.q_weight", + "shape": [ + 320, + 2560 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 11059200 + }, + { + "name": "model.layers.8.self_attn.o_proj.q_scale", + "shape": [ + 80, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 14336000 + }, + { + "name": "model.layers.9.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14745600 + }, + { + "name": "model.layers.9.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14750720 + }, + { + "name": "model.layers.9.mlp.down_proj.q_weight", + "shape": [ + 864, + 2560 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.9.mlp.down_proj.q_scale", + "shape": [ + 216, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 23603200 + }, + { + "name": "model.layers.9.mlp.gate_up_proj.q_scale", + "shape": [ + 80, + 13824 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 24709120 + }, + { + "name": "model.layers.9.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26920960 + }, + { + "name": "model.layers.9.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26926080 + } + ], + "md5sum": "349101791fbc4ce671746fc5d10aa556" + }, + { + "dataPath": "params_shard_66.bin", + "format": "raw-shard", + "nbytes": 14755840, + "records": [ + { + "name": "model.layers.9.self_attn.qkv_proj.q_weight", + "shape": [ + 320, + 7680 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 0 + }, + { + "name": "model.layers.9.self_attn.qkv_proj.q_scale", + "shape": [ + 80, + 7680 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 9830400 + }, + { + "name": "model.layers.9.self_attn.o_proj.q_weight", + "shape": [ + 320, + 2560 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 11059200 + }, + { + "name": "model.layers.9.self_attn.o_proj.q_scale", + "shape": [ + 80, + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 14336000 + }, + { + "name": "model.norm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14745600 + }, + { + "name": "model.norm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14750720 + } + ], + "md5sum": "a411fe69de7a4b49ef8baebc55690709" + } + ] +} \ No newline at end of file