diff --git "a/ndarray-cache.json" "b/ndarray-cache.json" new file mode 100644--- /dev/null +++ "b/ndarray-cache.json" @@ -0,0 +1,10103 @@ +{ + "metadata": { + "ParamSize": 709, + "ParamBytes": 20481200128.0, + "BitsPerParam": 3.644827456917361 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 389283840, + "records": [ + { + "name": "lm_head.q_weight", + "shape": [ + 152064, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 389283840, + "byteOffset": 0 + } + ], + "md5sum": "474cf35921f4a698f1efd3023739216a" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 48660480, + "records": [ + { + "name": "lm_head.q_scale", + "shape": [ + 152064, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 48660480, + "byteOffset": 0 + } + ], + "md5sum": "5c419e10d2fab7a700286833179567fe" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.63.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "d45e4de7f0f119a9e253afd43146b760" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.63.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "f099952644deaff90da29d23d4e9bbdd" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 389283840, + "records": [ + { + "name": "model.embed_tokens.q_weight", + "shape": [ + 152064, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 389283840, + "byteOffset": 0 + } + ], + "md5sum": "57e2b2694d1f5226ba669c976d09f975" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 48660480, + "records": [ + { + "name": "model.embed_tokens.q_scale", + "shape": [ + 152064, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 48660480, + "byteOffset": 0 + } + ], + "md5sum": "90505e9fdc2e480e384f43a84b1da842" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.0.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "7d49e8eff43f89d86e1428c293517648" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 26583040, + "records": [ + { + "name": "model.layers.63.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 0 + }, + { + "name": "model.layers.63.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 10240 + }, + { + "name": "model.layers.63.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 8857600 + }, + { + "name": "model.layers.63.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 26552320 + }, + { + "name": "model.norm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 26562560 + }, + { + "name": "model.layers.0.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 26572800 + } + ], + "md5sum": "59b0616fefd9daeb65718c60d5a5e57d" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "b6b390d2ea435f9e2fba3cf0172ae529" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.0.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "af2e1cf7963114262f6fb577839887dc" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 28860416, + "records": [ + { + "name": "model.layers.0.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 0 + }, + { + "name": "model.layers.0.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 8847360 + }, + { + "name": "model.layers.0.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 26542080 + }, + { + "name": "model.layers.0.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 26552320 + }, + { + "name": "model.layers.0.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 26566656 + } + ], + "md5sum": "b2e803a0bf18b5edd9a5744c91973793" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.1.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "ab97664a260268f33f872ed6ad163e62" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "b1aa007b21148de6a5a8257dce93ea91" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "970f898b8dca3510f62036d37225f142" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.1.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "f4929aa128b7434e9ec143cb3bc7a6d4" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.0.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.0.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.1.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.1.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.1.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.1.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.1.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "7360c699a477e9f82feddb0904f6ed3a" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.2.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "2ef5490888812af08ca35620b0bafde7" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "330d9570b01155401cfdb626eb39d1e0" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "896dae530ecfca1db76d9223759702d6" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.2.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "251f7f6e9225f455b388f18688ec2653" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.1.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.1.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.2.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.2.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.2.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.2.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.2.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "2dbab84545639011eba85b74b84ad7e4" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "07a4f0cc989705be7ad2630cd91559fd" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.3.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "d6372e2d9ac081603b8ec719e52fe0d6" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 32454656, + "records": [ + { + "name": "model.layers.2.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.2.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.3.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 14745600 + }, + { + "name": "model.layers.3.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 32440320 + } + ], + "md5sum": "dc088360d5805820ed4826b78b637287" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.10.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "db3e089a68299cf5910f04e835c8b222" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "02a5d06f3b4c0b6763d34af08fdeea83" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "70956b0a96bd00cec4c42ae688964c72" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.10.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "5b137241c546b3237880bb85a18a26c8" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 28215296, + "records": [ + { + "name": "model.layers.3.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 0 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 2293760 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 15400960 + }, + { + "name": "model.layers.10.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 17039360 + }, + { + "name": "model.layers.10.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 17049600 + }, + { + "name": "model.layers.10.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 25896960 + }, + { + "name": "model.layers.10.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 25907200 + }, + { + "name": "model.layers.10.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 25921536 + } + ], + "md5sum": "db44576e1999e1b74d978f038cedc3ca" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.11.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "2f59233fceca8d60f511457487c683ca" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "6f3918fe001fb1e93f639b3587c653ac" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "f59b6286314d9a838eef95e663b0ba88" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.11.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "4569191077ea8fc55436a8d77a4afdc7" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.10.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.10.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.11.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.11.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.11.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.11.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.11.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "151e2d5457178d5fd993e224c991803c" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.12.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "a6fc70ea36586453e02c7b6d0c787b76" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "c6951eb9622b3c2a2dc0cfd5f84c6734" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "f18430518757a7ed9ebd20f6960ae00e" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.12.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "35dbd0668a396c92b4a3ee4f0148900d" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.11.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.12.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.12.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.12.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.12.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.12.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "d36d80d5a4f55abae000ce3d1c1f9c10" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.13.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "2075d3978900334e8131344b7894c7f1" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.13.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "3574b5b379569d1f31cefcc6c383a82d" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 32454656, + "records": [ + { + "name": "model.layers.12.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.13.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 14745600 + }, + { + "name": "model.layers.13.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 32440320 + } + ], + "md5sum": "a1272a7611fd919eb4733d4bf55b1590" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.8.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "59422487379ba39227995e2b89777c88" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "39f7b7f3b4b27a2a3ada75e548801fab" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "f51bd29684c361e1f6d0117441584848" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.9.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "754cef6bc5a13873b3bd2236304476dc" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 25917440, + "records": [ + { + "name": "model.layers.13.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 0 + }, + { + "name": "model.layers.13.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 2293760 + }, + { + "name": "model.layers.13.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 15400960 + }, + { + "name": "model.layers.8.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 17039360 + }, + { + "name": "model.layers.8.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 17049600 + }, + { + "name": "model.layers.8.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 25896960 + }, + { + "name": "model.layers.9.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 25907200 + } + ], + "md5sum": "53782f31534ca2a85f32d908c2152ed7" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.9.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "3d75c3aaf2ea2fc0fea4cf5db9b69e0b" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.9.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "ac1aa33ef1833173858fdec322e95b04" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 28860416, + "records": [ + { + "name": "model.layers.9.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 0 + }, + { + "name": "model.layers.9.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 8847360 + }, + { + "name": "model.layers.9.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 26542080 + }, + { + "name": "model.layers.9.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 26552320 + }, + { + "name": "model.layers.9.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 26566656 + } + ], + "md5sum": "db5a098c2fe709004b8878f404b38549" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.13.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "3c1a9d32c229bb044649e1efe9d0cdb7" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.14.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "54a58c920423ccf173abf7be6183cf2e" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "8d1db6a18e02120c852f72624ee157b7" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "9c64530f351d20bae88f6329fa3943a5" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.14.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "a33eeeb9901c55a7f694c04e6c804b99" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 32495616, + "records": [ + { + "name": "model.layers.9.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.9.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.13.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.13.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.13.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.14.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23613440 + }, + { + "name": "model.layers.14.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 23623680 + }, + { + "name": "model.layers.14.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 32471040 + }, + { + "name": "model.layers.14.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 32481280 + } + ], + "md5sum": "1f800d37a0706c2de9db9d9327553f4c" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.15.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "658437bd0acda3d02683fe3bb3fb52bd" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.15.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "fa74b789d06637dc49dbd64ebc8f2527" + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.15.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "f6a5bec08b363fe6570cac8fa087e787" + }, + { + "dataPath": "params_shard_59.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.15.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "51958fe0b37ef6b3e26a291a9a2e5edc" + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 28215296, + "records": [ + { + "name": "model.layers.14.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 0 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 2293760 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 15400960 + }, + { + "name": "model.layers.15.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 17039360 + }, + { + "name": "model.layers.15.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 17049600 + }, + { + "name": "model.layers.15.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 25896960 + }, + { + "name": "model.layers.15.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 25907200 + }, + { + "name": "model.layers.15.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 25921536 + } + ], + "md5sum": "fd8c8560e3921d54d799174bc0674e68" + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.16.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "0d87463f196c71ec3a03ffc7bb9a62a0" + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "ebeb473494d3351282aab190f7b45f49" + }, + { + "dataPath": "params_shard_63.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "c1e0ef676f50aa87f0269e2bf12e3cd7" + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.16.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "826d08387cc0cadcf62731d8b43c44b7" + }, + { + "dataPath": "params_shard_65.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.15.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.16.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.16.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.16.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.16.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.16.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "e0c9abf0749613580e5f70a41d1c3c7a" + }, + { + "dataPath": "params_shard_66.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.17.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "a39e106e08bc31cbff36f12af2eb7cc8" + }, + { + "dataPath": "params_shard_67.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.17.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "19b9e61eb74aebb8ad7fb37eabeb46c4" + }, + { + "dataPath": "params_shard_68.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.17.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "75cf73e6a83e3743426963ae3c9c20a6" + }, + { + "dataPath": "params_shard_69.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.17.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "24c3e3761f79878d66cd741113d05f3a" + }, + { + "dataPath": "params_shard_70.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.16.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.16.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.17.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.17.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.17.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.17.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.17.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "acdc7a6d242b0bf729f4e097e1c311dd" + }, + { + "dataPath": "params_shard_71.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.18.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "9f6ab4e17e385ff3349f9d8c7b6ff018" + }, + { + "dataPath": "params_shard_72.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.18.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "a60a7944045d9a3ed3df64e3b1c2ac4a" + }, + { + "dataPath": "params_shard_73.bin", + "format": "raw-shard", + "nbytes": 32454656, + "records": [ + { + "name": "model.layers.17.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.18.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 14745600 + }, + { + "name": "model.layers.18.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 32440320 + } + ], + "md5sum": "c82ba35e47df4d55eb9b87b3efd0df2a" + }, + { + "dataPath": "params_shard_74.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.18.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "3896e66b1ebb456361df46f328d4b1ac" + }, + { + "dataPath": "params_shard_75.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.19.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "5a64b517566893eab824dbe74e104872" + }, + { + "dataPath": "params_shard_76.bin", + "format": "raw-shard", + "nbytes": 25917440, + "records": [ + { + "name": "model.layers.18.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 0 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 2293760 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 15400960 + }, + { + "name": "model.layers.18.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 17039360 + }, + { + "name": "model.layers.18.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 17049600 + }, + { + "name": "model.layers.18.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 25896960 + }, + { + "name": "model.layers.19.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 25907200 + } + ], + "md5sum": "b94a2249243f305963b851cb0db0d8f0" + }, + { + "dataPath": "params_shard_77.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.19.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "5f8d6c2db3448d74f1935a4e1516ec64" + }, + { + "dataPath": "params_shard_78.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.19.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "b53e1dee704cc3e591e445d842cc88f8" + }, + { + "dataPath": "params_shard_79.bin", + "format": "raw-shard", + "nbytes": 28860416, + "records": [ + { + "name": "model.layers.19.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 0 + }, + { + "name": "model.layers.19.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 8847360 + }, + { + "name": "model.layers.19.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 26542080 + }, + { + "name": "model.layers.19.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 26552320 + }, + { + "name": "model.layers.19.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 26566656 + } + ], + "md5sum": "3cf57efd04315023166901488ac54963" + }, + { + "dataPath": "params_shard_80.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.20.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "60a25a50b45a2bc1fb0320399c761ee5" + }, + { + "dataPath": "params_shard_81.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "db324bca265fc19c407d5d581b5b545d" + }, + { + "dataPath": "params_shard_82.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "b3d1d322532ae7990f8f06623d7015f6" + }, + { + "dataPath": "params_shard_83.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.20.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "f0be6d3836e33c80b161c7fbc599d31b" + }, + { + "dataPath": "params_shard_84.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.19.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.20.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.20.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.20.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.20.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.20.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "711c5c47030ec68fe975ae181a3f778e" + }, + { + "dataPath": "params_shard_85.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.21.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "f5aa304de7b645dc7ebeb7aa8deacdae" + }, + { + "dataPath": "params_shard_86.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.21.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "21b723f30946b849e841996ffc327b1a" + }, + { + "dataPath": "params_shard_87.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.21.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "3cea37ce9cb964676287d202e2b8a6af" + }, + { + "dataPath": "params_shard_88.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.21.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "cd6f6aee2ec795af2b4204de7174959d" + }, + { + "dataPath": "params_shard_89.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.20.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.21.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.21.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.21.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.21.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.21.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "c717d539550dfb8e677ddafc5662a529" + }, + { + "dataPath": "params_shard_90.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.22.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "f6df2e8aeae22077a4d5bee04b9a88eb" + }, + { + "dataPath": "params_shard_91.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "86faf8c1718d42d6dd9cc549b7bff6ef" + }, + { + "dataPath": "params_shard_92.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "6c57d40755fd1a0c71b9a8fda57c2038" + }, + { + "dataPath": "params_shard_93.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.22.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "58aaacf9049033c920cceac5890700a7" + }, + { + "dataPath": "params_shard_94.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.21.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.21.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.22.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.22.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.22.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.22.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.22.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "8a1ef9e55063e735d1b6ccd7d1c3a507" + }, + { + "dataPath": "params_shard_95.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.23.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "f3b5460b06f7f927d699906433ee89b0" + }, + { + "dataPath": "params_shard_96.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.23.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "492db42912496705ce1557706d8af5f6" + }, + { + "dataPath": "params_shard_97.bin", + "format": "raw-shard", + "nbytes": 32454656, + "records": [ + { + "name": "model.layers.22.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.23.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 14745600 + }, + { + "name": "model.layers.23.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 32440320 + } + ], + "md5sum": "7579c24a1cc4940113fa11a443c1e840" + }, + { + "dataPath": "params_shard_98.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.23.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "22f5fbbcbfc4ad275b4507a0594e5357" + }, + { + "dataPath": "params_shard_99.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.24.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "b1340b8f1b77c9d9285e1cb9dd6e87f0" + }, + { + "dataPath": "params_shard_100.bin", + "format": "raw-shard", + "nbytes": 25917440, + "records": [ + { + "name": "model.layers.23.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 0 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 2293760 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 15400960 + }, + { + "name": "model.layers.23.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 17039360 + }, + { + "name": "model.layers.23.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 17049600 + }, + { + "name": "model.layers.23.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 25896960 + }, + { + "name": "model.layers.24.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 25907200 + } + ], + "md5sum": "238c44410ff731bd9461c645b913ac1f" + }, + { + "dataPath": "params_shard_101.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "8ae7c761ceb6efb091413a18fa5a7867" + }, + { + "dataPath": "params_shard_102.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.24.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "80aaa189232b3580f39f6aa6d5c6bd37" + }, + { + "dataPath": "params_shard_103.bin", + "format": "raw-shard", + "nbytes": 28860416, + "records": [ + { + "name": "model.layers.24.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 0 + }, + { + "name": "model.layers.24.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 8847360 + }, + { + "name": "model.layers.24.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 26542080 + }, + { + "name": "model.layers.24.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 26552320 + }, + { + "name": "model.layers.24.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 26566656 + } + ], + "md5sum": "9fba12f0e106c699267997469aa8a320" + }, + { + "dataPath": "params_shard_104.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.25.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "2c749ab6caaef0014bfbe5c6c7962900" + }, + { + "dataPath": "params_shard_105.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.25.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "0f9ecbf1469014f89ad8d807087b842b" + }, + { + "dataPath": "params_shard_106.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.25.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "a08b298466ca3f33b24afd4571808aa2" + }, + { + "dataPath": "params_shard_107.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.25.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "99119024e2e7bedc6afc37b89e47b286" + }, + { + "dataPath": "params_shard_108.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.24.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.24.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.25.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.25.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.25.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.25.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.25.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "09a78fe26541fb2af4499dedb48fb8cd" + }, + { + "dataPath": "params_shard_109.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.26.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "8a743cf34e28a98abd68eb9b115d0ec0" + }, + { + "dataPath": "params_shard_110.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.26.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "b5269cadd81ca49464c5b5603a508550" + }, + { + "dataPath": "params_shard_111.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.26.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "137d16676a58583910d9f701828aadfc" + }, + { + "dataPath": "params_shard_112.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.26.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "c1de0778b15cf4731b0b09b00de1785b" + }, + { + "dataPath": "params_shard_113.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.25.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.25.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.26.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.26.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.26.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.26.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.26.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "5f36bce8c86930a92358f6dd821e28b5" + }, + { + "dataPath": "params_shard_114.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.27.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "b7dc6524ce3b85d0c2d8aa92b70d318e" + }, + { + "dataPath": "params_shard_115.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.27.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "77f9a80b21eeeccde1cbe7090d230719" + }, + { + "dataPath": "params_shard_116.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.27.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "f6ea6ae0a452da1f824aa079d1f4e7a5" + }, + { + "dataPath": "params_shard_117.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.27.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "6fe836f0d5fefcc5551870fb2126091f" + }, + { + "dataPath": "params_shard_118.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.26.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.26.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.27.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.27.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.27.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.27.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.27.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "b9434cfb80724cabfd9703f0a7391cd1" + }, + { + "dataPath": "params_shard_119.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.28.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "a68e0d06698c042bc51e6fcb4340ea6d" + }, + { + "dataPath": "params_shard_120.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.28.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "ee8d5cf73140cb6bee3fd2f926e81470" + }, + { + "dataPath": "params_shard_121.bin", + "format": "raw-shard", + "nbytes": 32454656, + "records": [ + { + "name": "model.layers.27.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.27.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.28.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 14745600 + }, + { + "name": "model.layers.28.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 32440320 + } + ], + "md5sum": "bb704cd48c1dbd461932ed83a1263d40" + }, + { + "dataPath": "params_shard_122.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.28.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "6c0cbbb1d8b13845a35fadfefdf59c65" + }, + { + "dataPath": "params_shard_123.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.29.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "5fafc800f14e408bc474c5107ff7f1a7" + }, + { + "dataPath": "params_shard_124.bin", + "format": "raw-shard", + "nbytes": 25917440, + "records": [ + { + "name": "model.layers.28.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 0 + }, + { + "name": "model.layers.28.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 2293760 + }, + { + "name": "model.layers.28.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 15400960 + }, + { + "name": "model.layers.28.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 17039360 + }, + { + "name": "model.layers.28.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 17049600 + }, + { + "name": "model.layers.28.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 25896960 + }, + { + "name": "model.layers.29.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 25907200 + } + ], + "md5sum": "b919b2250f524d9c52b29a769ba267f0" + }, + { + "dataPath": "params_shard_125.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.29.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "7ee4ffa72343b10c1d571416659c27aa" + }, + { + "dataPath": "params_shard_126.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.29.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "0bb4b55a423c5efa131d9c5e5965b7fb" + }, + { + "dataPath": "params_shard_127.bin", + "format": "raw-shard", + "nbytes": 28860416, + "records": [ + { + "name": "model.layers.29.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 0 + }, + { + "name": "model.layers.29.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 8847360 + }, + { + "name": "model.layers.29.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 26542080 + }, + { + "name": "model.layers.29.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 26552320 + }, + { + "name": "model.layers.29.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 26566656 + } + ], + "md5sum": "002b34d2633f84f724f2e7c161b2ce29" + }, + { + "dataPath": "params_shard_128.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.30.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "40f7c45fbb4d287bd7f681a1f754eb83" + }, + { + "dataPath": "params_shard_129.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.30.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "7ff3957a437f0697f444e83dc24fc3b0" + }, + { + "dataPath": "params_shard_130.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.30.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "15dedee2cb09796bb235afa900d20793" + }, + { + "dataPath": "params_shard_131.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.30.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "9e842dcdf6f47bdfefbc955b861045af" + }, + { + "dataPath": "params_shard_132.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.29.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.29.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.30.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.30.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.30.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.30.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.30.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "28bfc2630cd401347071b9e73f90fb15" + }, + { + "dataPath": "params_shard_133.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.31.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "23bca087ec1bef7eb1ca8aafe20215ca" + }, + { + "dataPath": "params_shard_134.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.31.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "468cf8a111f9d43963d7fb0eb59fb370" + }, + { + "dataPath": "params_shard_135.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.31.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "59157f787c21f161e82e8537d38d53d4" + }, + { + "dataPath": "params_shard_136.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.31.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "b10352f6f9ea32f376fd6159fc7eb298" + }, + { + "dataPath": "params_shard_137.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.30.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.30.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.31.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.31.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.31.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.31.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.31.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "a0a6fbf4f5f7350b1be1c90d2202b564" + }, + { + "dataPath": "params_shard_138.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.32.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "d6fa49fcaf68c7dd9c34bd75feaf3dd1" + }, + { + "dataPath": "params_shard_139.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.32.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "de1a62fde33efb58a2ee483d062b904a" + }, + { + "dataPath": "params_shard_140.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.32.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "74d659d00a76183000642f04bdc4db17" + }, + { + "dataPath": "params_shard_141.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.32.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "d945649937231041b6f8fe83be3a7f61" + }, + { + "dataPath": "params_shard_142.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.31.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.31.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.32.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.32.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.32.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.32.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.32.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "9aa80a06c5d88b1adde42bdaf933f140" + }, + { + "dataPath": "params_shard_143.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.33.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "3c88833b48306e500f9f3bf1adb3eb02" + }, + { + "dataPath": "params_shard_144.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.33.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "81da7116cbedb0c398c3b229217916c8" + }, + { + "dataPath": "params_shard_145.bin", + "format": "raw-shard", + "nbytes": 32454656, + "records": [ + { + "name": "model.layers.32.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.32.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.33.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 14745600 + }, + { + "name": "model.layers.33.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 32440320 + } + ], + "md5sum": "d12bc8093cab3f48eb62e1f45534e2ee" + }, + { + "dataPath": "params_shard_146.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.3.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "5569687627e500bb6e27664a5193b4b1" + }, + { + "dataPath": "params_shard_147.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.4.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "e626d9d7fe6a79b61f26b1036f0a543f" + }, + { + "dataPath": "params_shard_148.bin", + "format": "raw-shard", + "nbytes": 25917440, + "records": [ + { + "name": "model.layers.33.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 0 + }, + { + "name": "model.layers.33.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 2293760 + }, + { + "name": "model.layers.33.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 15400960 + }, + { + "name": "model.layers.3.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 17039360 + }, + { + "name": "model.layers.3.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 17049600 + }, + { + "name": "model.layers.3.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 25896960 + }, + { + "name": "model.layers.4.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 25907200 + } + ], + "md5sum": "9eafe82fb908682a40f43d0d56ab6f66" + }, + { + "dataPath": "params_shard_149.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "be449eaca184b62643424b4330e1da9c" + }, + { + "dataPath": "params_shard_150.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.4.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "42ed31ae8874b5079f13ba54376083b9" + }, + { + "dataPath": "params_shard_151.bin", + "format": "raw-shard", + "nbytes": 28860416, + "records": [ + { + "name": "model.layers.4.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 0 + }, + { + "name": "model.layers.4.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 8847360 + }, + { + "name": "model.layers.4.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 26542080 + }, + { + "name": "model.layers.4.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 26552320 + }, + { + "name": "model.layers.4.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 26566656 + } + ], + "md5sum": "36673864f7bd2a3027b854b4277f6e25" + }, + { + "dataPath": "params_shard_152.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.5.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "ed9dae59da8c8243bc33d9fe1f6fe827" + }, + { + "dataPath": "params_shard_153.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "1844e6db8bd37633d4aa971f61265225" + }, + { + "dataPath": "params_shard_154.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "c74fce8fc07c6c247100fb1ca1253928" + }, + { + "dataPath": "params_shard_155.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.5.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "58920d63c67a4ba9f6451dd80e79faf6" + }, + { + "dataPath": "params_shard_156.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.4.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.4.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.5.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.5.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.5.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.5.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.5.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "a642a191c91fdfd169c88ff3866a04a1" + }, + { + "dataPath": "params_shard_157.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.6.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "2c7f5cf5911559bdfba8e82dea9dae8a" + }, + { + "dataPath": "params_shard_158.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "c4f0df22713b7b9504fa10f5f1662902" + }, + { + "dataPath": "params_shard_159.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "8c433f216871e1c22a5ce193730ac762" + }, + { + "dataPath": "params_shard_160.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.6.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "50c720fb75583e22fe8dbe326df9ccb4" + }, + { + "dataPath": "params_shard_161.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.5.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.5.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.6.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.6.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.6.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.6.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.6.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "c51753abfa40fa03d48aef7ece43e13d" + }, + { + "dataPath": "params_shard_162.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.7.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "d34dc269f27b17e009f76583320b6353" + }, + { + "dataPath": "params_shard_163.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "9933fe01050d4374f44d6567d31c819b" + }, + { + "dataPath": "params_shard_164.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "6042497efec341607f91d1dbc813a870" + }, + { + "dataPath": "params_shard_165.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.7.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "7f3b04d3c77f8cec775be59dc71ca8cc" + }, + { + "dataPath": "params_shard_166.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.6.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.7.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.7.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.7.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.7.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.7.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "255d4f21dc7260421acda108fa893c07" + }, + { + "dataPath": "params_shard_167.bin", + "format": "raw-shard", + "nbytes": 33110016, + "records": [ + { + "name": "model.layers.7.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.7.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.8.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 14745600 + }, + { + "name": "model.layers.8.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 14759936 + } + ], + "md5sum": "d5e92e647ca362c0b238f6c03e43ff7a" + }, + { + "dataPath": "params_shard_168.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.33.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "c0b767259883c0a14ef272e87e5338a4" + }, + { + "dataPath": "params_shard_169.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.34.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "95e2eb687503e55555b6ea7ca0282b52" + }, + { + "dataPath": "params_shard_170.bin", + "format": "raw-shard", + "nbytes": 25917440, + "records": [ + { + "name": "model.layers.8.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 0 + }, + { + "name": "model.layers.8.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 2293760 + }, + { + "name": "model.layers.8.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 15400960 + }, + { + "name": "model.layers.33.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 17039360 + }, + { + "name": "model.layers.33.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 17049600 + }, + { + "name": "model.layers.33.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 25896960 + }, + { + "name": "model.layers.34.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 25907200 + } + ], + "md5sum": "ccb5fdf69c897f2d46bc55b4c5c185d7" + }, + { + "dataPath": "params_shard_171.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.34.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "35af7d12d180c5e901a1f0732edbd0a5" + }, + { + "dataPath": "params_shard_172.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.34.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "69723427eb33003e0e51cb3c1b31bd14" + }, + { + "dataPath": "params_shard_173.bin", + "format": "raw-shard", + "nbytes": 28860416, + "records": [ + { + "name": "model.layers.34.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 0 + }, + { + "name": "model.layers.34.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 8847360 + }, + { + "name": "model.layers.34.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 26542080 + }, + { + "name": "model.layers.34.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 26552320 + }, + { + "name": "model.layers.34.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 26566656 + } + ], + "md5sum": "252091ffd1c06c71be41ba6dec923625" + }, + { + "dataPath": "params_shard_174.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.35.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "004db4998f48b618b35daf40e23e9650" + }, + { + "dataPath": "params_shard_175.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.35.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "f6ea1e19eb606c07b0a80b35e3db84c6" + }, + { + "dataPath": "params_shard_176.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.35.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "45ee40609e725e711368d90ab1415439" + }, + { + "dataPath": "params_shard_177.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.35.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "66ad1ba3dd2d08f1379c289403573100" + }, + { + "dataPath": "params_shard_178.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.34.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.34.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.35.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.35.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.35.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.35.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.35.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "777ccddc5ac391871528512a4a6b5293" + }, + { + "dataPath": "params_shard_179.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.36.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "e65eac398ac72ca3639fe0d99f7c4225" + }, + { + "dataPath": "params_shard_180.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.36.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "5b2de47d73d47f6b7be06553b41a7743" + }, + { + "dataPath": "params_shard_181.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.36.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "fa52a169d099ae90681ff338e516650f" + }, + { + "dataPath": "params_shard_182.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.36.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "7d6e68508685382ec193405a1b33aa06" + }, + { + "dataPath": "params_shard_183.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.35.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.35.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.36.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.36.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.36.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.36.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.36.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "f6c2d6ce6697abd5f1d3b9d56fb15e20" + }, + { + "dataPath": "params_shard_184.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.37.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "46c192c8f7dc50f53452520d84d1a6d3" + }, + { + "dataPath": "params_shard_185.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.37.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "ba89809265ffd62c034f73f8da3e0706" + }, + { + "dataPath": "params_shard_186.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.37.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "d72ecb9f1a778bb39d8d5c6d7d11c61c" + }, + { + "dataPath": "params_shard_187.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.37.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "1439d0f9788e31664f1ed56e854fcd0b" + }, + { + "dataPath": "params_shard_188.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.36.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.36.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.37.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.37.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.37.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.37.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.37.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "7577579e71fd80042ad98670c7c92ca7" + }, + { + "dataPath": "params_shard_189.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.38.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "d95b57e323b13a2ef695b33e77ae959a" + }, + { + "dataPath": "params_shard_190.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.38.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "c90e600e4a6fe9566d87d864233f905f" + }, + { + "dataPath": "params_shard_191.bin", + "format": "raw-shard", + "nbytes": 32454656, + "records": [ + { + "name": "model.layers.37.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.37.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.38.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 14745600 + }, + { + "name": "model.layers.38.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 32440320 + } + ], + "md5sum": "d526f5ce987cb120e8ca6707f2e8d6a1" + }, + { + "dataPath": "params_shard_192.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.38.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "d1cae5826378bc01a8f739f7b0149a73" + }, + { + "dataPath": "params_shard_193.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.39.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "8ae479cb13eaba198c17e7768a891b4e" + }, + { + "dataPath": "params_shard_194.bin", + "format": "raw-shard", + "nbytes": 25917440, + "records": [ + { + "name": "model.layers.38.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 0 + }, + { + "name": "model.layers.38.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 2293760 + }, + { + "name": "model.layers.38.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 15400960 + }, + { + "name": "model.layers.38.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 17039360 + }, + { + "name": "model.layers.38.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 17049600 + }, + { + "name": "model.layers.38.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 25896960 + }, + { + "name": "model.layers.39.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 25907200 + } + ], + "md5sum": "3ba011c68af665131d23911f96d01da0" + }, + { + "dataPath": "params_shard_195.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.39.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "0dbb6ada93ae302b3581d7ab8306018c" + }, + { + "dataPath": "params_shard_196.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.39.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "6c16295e564cf0e7d86e1d0d5c448d5e" + }, + { + "dataPath": "params_shard_197.bin", + "format": "raw-shard", + "nbytes": 28860416, + "records": [ + { + "name": "model.layers.39.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 0 + }, + { + "name": "model.layers.39.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 8847360 + }, + { + "name": "model.layers.39.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 26542080 + }, + { + "name": "model.layers.39.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 26552320 + }, + { + "name": "model.layers.39.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 26566656 + } + ], + "md5sum": "6c01712ac0276a74c9df9b70cdff9c05" + }, + { + "dataPath": "params_shard_198.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.40.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "b35afdd322e86b8c08cd595b5b1b9a9d" + }, + { + "dataPath": "params_shard_199.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.40.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "996e266aa824b6d5d0abfa028a4575b0" + }, + { + "dataPath": "params_shard_200.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.40.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "1b375ba6e86cb5ed84e29ebbc3a23823" + }, + { + "dataPath": "params_shard_201.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.40.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "5fdbc1b2bae6a02c56e7970b04bfaa2a" + }, + { + "dataPath": "params_shard_202.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.39.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.39.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.40.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.40.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.40.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.40.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.40.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "cbe2e735a0cd0ee96ce3740f657fcf52" + }, + { + "dataPath": "params_shard_203.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.41.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "c7d83082812a8136284562e7e8be482f" + }, + { + "dataPath": "params_shard_204.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.41.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "70d294267285411eea04cb7d73d90f68" + }, + { + "dataPath": "params_shard_205.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.41.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "6e46f6f532e3c1274b7487287d43f513" + }, + { + "dataPath": "params_shard_206.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.41.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "3211c1d243bb35dab7fa0a48b043466b" + }, + { + "dataPath": "params_shard_207.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.40.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.40.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.41.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.41.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.41.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.41.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.41.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "c17f77d59c1a1c1f374fcb0144b54dd4" + }, + { + "dataPath": "params_shard_208.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.42.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "b5815478deae5efdb2f7d9509f9d0b01" + }, + { + "dataPath": "params_shard_209.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.42.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "d650a7ec3a07a8cfc8939d3761910ec5" + }, + { + "dataPath": "params_shard_210.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.42.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "b006d8c6d39ebf18f989dd11e2c4704c" + }, + { + "dataPath": "params_shard_211.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.42.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "c8ed2e0d7adcc7a823003163824e4b80" + }, + { + "dataPath": "params_shard_212.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.41.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.41.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.42.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.42.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.42.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.42.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.42.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "ba26154f7b35cdba004a2712d0b51d2c" + }, + { + "dataPath": "params_shard_213.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.43.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "87879418a8c85471279f1d14223da5bf" + }, + { + "dataPath": "params_shard_214.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.43.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "ec5c5198a73b3234a7073a5001a28b43" + }, + { + "dataPath": "params_shard_215.bin", + "format": "raw-shard", + "nbytes": 32454656, + "records": [ + { + "name": "model.layers.42.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.42.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.43.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 14745600 + }, + { + "name": "model.layers.43.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 32440320 + } + ], + "md5sum": "3343428722209ed709b5af4f3144d4f8" + }, + { + "dataPath": "params_shard_216.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.43.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "66c8465ba97a87f96b74be3d94d7f564" + }, + { + "dataPath": "params_shard_217.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.44.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "5ad0c86cbbe9169bbafcf8ab85874dd1" + }, + { + "dataPath": "params_shard_218.bin", + "format": "raw-shard", + "nbytes": 25917440, + "records": [ + { + "name": "model.layers.43.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 0 + }, + { + "name": "model.layers.43.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 2293760 + }, + { + "name": "model.layers.43.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 15400960 + }, + { + "name": "model.layers.43.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 17039360 + }, + { + "name": "model.layers.43.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 17049600 + }, + { + "name": "model.layers.43.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 25896960 + }, + { + "name": "model.layers.44.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 25907200 + } + ], + "md5sum": "db58575917b266e6f8e4c80894e40576" + }, + { + "dataPath": "params_shard_219.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.44.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "e755333fe08f40695bd4548a9605ef83" + }, + { + "dataPath": "params_shard_220.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.44.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "108be907978c3088ec8f86f59bcc4722" + }, + { + "dataPath": "params_shard_221.bin", + "format": "raw-shard", + "nbytes": 28860416, + "records": [ + { + "name": "model.layers.44.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 0 + }, + { + "name": "model.layers.44.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 8847360 + }, + { + "name": "model.layers.44.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 26542080 + }, + { + "name": "model.layers.44.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 26552320 + }, + { + "name": "model.layers.44.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 26566656 + } + ], + "md5sum": "4720a82dda1ad1ecd8b3ba20de6ff44c" + }, + { + "dataPath": "params_shard_222.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.45.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "fcd72a3a5c98ebd460eb11afa47d9360" + }, + { + "dataPath": "params_shard_223.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.45.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "6660968b4e9a38bc3b359dbfbf94cc21" + }, + { + "dataPath": "params_shard_224.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.45.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "9a276f5ea282a98389de3f1ebfaf8287" + }, + { + "dataPath": "params_shard_225.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.45.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "ee30d093f753c9b8746b886758026b27" + }, + { + "dataPath": "params_shard_226.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.44.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.44.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.45.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.45.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.45.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.45.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.45.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "33eb8c490aa7e414b4dc9b4d1eaa1209" + }, + { + "dataPath": "params_shard_227.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.46.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "eefff15e54b6da3816b045648ce5310b" + }, + { + "dataPath": "params_shard_228.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.46.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "faa64861be464950da1f9528c19c2f19" + }, + { + "dataPath": "params_shard_229.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.46.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "6a055af24dc11e6d0c55d7f0647547ca" + }, + { + "dataPath": "params_shard_230.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.46.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "8f880893d56b7e5ac37b611b724ee2ed" + }, + { + "dataPath": "params_shard_231.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.45.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.45.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.46.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.46.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.46.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.46.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.46.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "65cc9a4fafc512dd5e277804a0e4a724" + }, + { + "dataPath": "params_shard_232.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.47.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "5540c71947c1a85c8a9d7cbb4d2dc6db" + }, + { + "dataPath": "params_shard_233.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.47.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "7a0497699534a410d9a798ba8e5fa8fa" + }, + { + "dataPath": "params_shard_234.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.47.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "e42f1e894b53e4b624944172468b3683" + }, + { + "dataPath": "params_shard_235.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.47.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "04cbf86c40530cebce2c1ef8e7273207" + }, + { + "dataPath": "params_shard_236.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.46.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.46.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.47.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.47.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.47.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.47.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.47.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "19696c9aa2733a84381fc7169601aca4" + }, + { + "dataPath": "params_shard_237.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.48.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "5115f42cd1f9fc1749158f7319cc6a87" + }, + { + "dataPath": "params_shard_238.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.48.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "4de0f7f3a9e90c9f446e27af5520b673" + }, + { + "dataPath": "params_shard_239.bin", + "format": "raw-shard", + "nbytes": 32454656, + "records": [ + { + "name": "model.layers.47.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.47.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.48.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 14745600 + }, + { + "name": "model.layers.48.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 32440320 + } + ], + "md5sum": "e09bf32bc06d6489fe0c24ccafa795f8" + }, + { + "dataPath": "params_shard_240.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.48.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "4a1a056b79807b9b47593f3f3849b58d" + }, + { + "dataPath": "params_shard_241.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.49.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "79a7b3ca1b4460aef033a97e6aba1893" + }, + { + "dataPath": "params_shard_242.bin", + "format": "raw-shard", + "nbytes": 25917440, + "records": [ + { + "name": "model.layers.48.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 0 + }, + { + "name": "model.layers.48.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 2293760 + }, + { + "name": "model.layers.48.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 15400960 + }, + { + "name": "model.layers.48.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 17039360 + }, + { + "name": "model.layers.48.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 17049600 + }, + { + "name": "model.layers.48.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 25896960 + }, + { + "name": "model.layers.49.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 25907200 + } + ], + "md5sum": "e604c9b0523b44d1b6db12b49386a806" + }, + { + "dataPath": "params_shard_243.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.49.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "d1bd2c63c8cfac9c7cdf5ff91a962d6a" + }, + { + "dataPath": "params_shard_244.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.49.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "953fa57e025850182146ad0b8ac6aeef" + }, + { + "dataPath": "params_shard_245.bin", + "format": "raw-shard", + "nbytes": 28860416, + "records": [ + { + "name": "model.layers.49.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 0 + }, + { + "name": "model.layers.49.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 8847360 + }, + { + "name": "model.layers.49.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 26542080 + }, + { + "name": "model.layers.49.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 26552320 + }, + { + "name": "model.layers.49.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 26566656 + } + ], + "md5sum": "c8c221cd0dd7e36e41779b303d1f2741" + }, + { + "dataPath": "params_shard_246.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.50.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "9324ede2ab41ebc566476e9705bce842" + }, + { + "dataPath": "params_shard_247.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.50.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "7f852204a223ace71f9bb3a2e7816c2d" + }, + { + "dataPath": "params_shard_248.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.50.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "d76f4d7be583a3b720c1a4191da01544" + }, + { + "dataPath": "params_shard_249.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.50.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "fd7cdfbdb46af5dbf41e12208dc737b9" + }, + { + "dataPath": "params_shard_250.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.49.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.49.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.50.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.50.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.50.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.50.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.50.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "cc69bbb39c00e9217f885bbc73c71926" + }, + { + "dataPath": "params_shard_251.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.51.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "2ef7eb6a41cdc52963341dc08fc35355" + }, + { + "dataPath": "params_shard_252.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.51.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "7d665eca2024535b7852a5dd2d28c70f" + }, + { + "dataPath": "params_shard_253.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.51.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "6a3c08b9ee9002fba3a4b8d57e40fde9" + }, + { + "dataPath": "params_shard_254.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.51.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "80791b10b536bf2334fdb760beb06edf" + }, + { + "dataPath": "params_shard_255.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.50.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.50.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.51.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.51.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.51.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.51.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.51.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "922847a1fef8715811604db80f79e447" + }, + { + "dataPath": "params_shard_256.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.52.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "6b49a10e5bb5be45d10d30033b97f2cf" + }, + { + "dataPath": "params_shard_257.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.52.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "fbc8e5d6202864f59f443c67c51921d7" + }, + { + "dataPath": "params_shard_258.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.52.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "55c17794cfadc417b2e02c8db739a970" + }, + { + "dataPath": "params_shard_259.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.52.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "2f3d23715ba663f1c658d0bd544a409f" + }, + { + "dataPath": "params_shard_260.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.51.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.51.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.52.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.52.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.52.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.52.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.52.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "2610da571b63385cfc1454a4607586f8" + }, + { + "dataPath": "params_shard_261.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.53.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "cf76d65025c760c15cc1b4b986a95f55" + }, + { + "dataPath": "params_shard_262.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.53.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "0bbf459bfca485c0fbc16eee2e543871" + }, + { + "dataPath": "params_shard_263.bin", + "format": "raw-shard", + "nbytes": 32454656, + "records": [ + { + "name": "model.layers.52.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.52.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.53.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 14745600 + }, + { + "name": "model.layers.53.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 32440320 + } + ], + "md5sum": "b22aebc390ebfd1a8dea537186207d1a" + }, + { + "dataPath": "params_shard_264.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.53.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "f4db3aac649cde63c0d3b371e380f089" + }, + { + "dataPath": "params_shard_265.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.54.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "ad5281f5a08b9a608a176d272de05fa9" + }, + { + "dataPath": "params_shard_266.bin", + "format": "raw-shard", + "nbytes": 25917440, + "records": [ + { + "name": "model.layers.53.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 0 + }, + { + "name": "model.layers.53.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 2293760 + }, + { + "name": "model.layers.53.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 15400960 + }, + { + "name": "model.layers.53.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 17039360 + }, + { + "name": "model.layers.53.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 17049600 + }, + { + "name": "model.layers.53.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 25896960 + }, + { + "name": "model.layers.54.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 25907200 + } + ], + "md5sum": "a886f47606f29f5308ef8b76bae29445" + }, + { + "dataPath": "params_shard_267.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.54.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "f7ff04f68820cae790c19d4b9ffbe4d9" + }, + { + "dataPath": "params_shard_268.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.54.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "51595640cedf7332e7220556042a942d" + }, + { + "dataPath": "params_shard_269.bin", + "format": "raw-shard", + "nbytes": 28860416, + "records": [ + { + "name": "model.layers.54.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 0 + }, + { + "name": "model.layers.54.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 8847360 + }, + { + "name": "model.layers.54.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 26542080 + }, + { + "name": "model.layers.54.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 26552320 + }, + { + "name": "model.layers.54.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 26566656 + } + ], + "md5sum": "39549857e62db1f5ee64776c6587d565" + }, + { + "dataPath": "params_shard_270.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.55.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "d03a90b5a8b1aec14ca9c6c50dcc25a2" + }, + { + "dataPath": "params_shard_271.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.55.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "fda1045412ddae142e915f9cc7fa193a" + }, + { + "dataPath": "params_shard_272.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.55.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "f51ed539df7a5f7933c9ea6213ae2aa6" + }, + { + "dataPath": "params_shard_273.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.55.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "e2e314c21372dcf905e020fbe054e3a4" + }, + { + "dataPath": "params_shard_274.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.54.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.54.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.55.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.55.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.55.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.55.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.55.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "cbea7ce1b1ca7d7da50d1d7598d37df8" + }, + { + "dataPath": "params_shard_275.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.56.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "8dc9538a85302aba341d57db58b14c19" + }, + { + "dataPath": "params_shard_276.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.56.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "a6d1a544bd45ed81bf5e30685e81dfeb" + }, + { + "dataPath": "params_shard_277.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.56.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "f9899d15fc3d1b970d5c41a2a69afd2f" + }, + { + "dataPath": "params_shard_278.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.56.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "19e10b9e21dc6fa483cf02d3efed7144" + }, + { + "dataPath": "params_shard_279.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.55.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.55.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.56.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.56.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.56.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.56.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.56.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "c58131094a6af3ccbf97fd5639d222c8" + }, + { + "dataPath": "params_shard_280.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.57.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "ae8ca4554573b23bcdbe56418ada72e5" + }, + { + "dataPath": "params_shard_281.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.57.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "008656dfd04eca1eed1053bad0ac602e" + }, + { + "dataPath": "params_shard_282.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.57.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "97a3b5a114e2cded562d0abac4c541e9" + }, + { + "dataPath": "params_shard_283.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.57.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "d20db9d869c22cb1300739d46a14319f" + }, + { + "dataPath": "params_shard_284.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.56.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.56.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.57.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.57.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.57.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.57.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.57.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "b8ce91061b026ba5babf6a9220252b2c" + }, + { + "dataPath": "params_shard_285.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.58.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "79f150a82807c8d0c366eb78d2dd10ce" + }, + { + "dataPath": "params_shard_286.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.58.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "8bff366720197f7ef436c34ced85cbeb" + }, + { + "dataPath": "params_shard_287.bin", + "format": "raw-shard", + "nbytes": 32454656, + "records": [ + { + "name": "model.layers.57.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.57.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.58.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 14745600 + }, + { + "name": "model.layers.58.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 32440320 + } + ], + "md5sum": "12426f291e41807256dd26cf42b75eed" + }, + { + "dataPath": "params_shard_288.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.58.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "a06f97dd17eb9d6134d63d237e0327d2" + }, + { + "dataPath": "params_shard_289.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.59.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "1cf743137a2de56fcb58e5bcc4dce9a1" + }, + { + "dataPath": "params_shard_290.bin", + "format": "raw-shard", + "nbytes": 25917440, + "records": [ + { + "name": "model.layers.58.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 0 + }, + { + "name": "model.layers.58.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 2293760 + }, + { + "name": "model.layers.58.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 15400960 + }, + { + "name": "model.layers.58.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 17039360 + }, + { + "name": "model.layers.58.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 17049600 + }, + { + "name": "model.layers.58.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 25896960 + }, + { + "name": "model.layers.59.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 25907200 + } + ], + "md5sum": "0291319e51f6b1104d534158c323e72d" + }, + { + "dataPath": "params_shard_291.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.59.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "011e248479d732e1d1d67f4385b5e656" + }, + { + "dataPath": "params_shard_292.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.59.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "5bd75190cc1e60f1f9af73273ac78941" + }, + { + "dataPath": "params_shard_293.bin", + "format": "raw-shard", + "nbytes": 28860416, + "records": [ + { + "name": "model.layers.59.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 0 + }, + { + "name": "model.layers.59.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 8847360 + }, + { + "name": "model.layers.59.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 26542080 + }, + { + "name": "model.layers.59.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 26552320 + }, + { + "name": "model.layers.59.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 26566656 + } + ], + "md5sum": "643ef38c77e346e9c9bfd82c0e5d8d86" + }, + { + "dataPath": "params_shard_294.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.60.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "8272d3db7214124216a1da7ae8813848" + }, + { + "dataPath": "params_shard_295.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.60.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "9c6071f5b8f748a212a48df1b91fb71e" + }, + { + "dataPath": "params_shard_296.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.60.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "aa2ca0be93003f8c1bd010b4c4937d58" + }, + { + "dataPath": "params_shard_297.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.60.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "191040523f3921da9e6ff79685f558e3" + }, + { + "dataPath": "params_shard_298.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.59.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.59.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.60.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.60.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.60.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.60.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.60.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "3aacf7ca8cda7efed4d98f4d00043c8b" + }, + { + "dataPath": "params_shard_299.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.61.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "aedcbf793830a61937a79c915224ca60" + }, + { + "dataPath": "params_shard_300.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.61.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "f33dd19447af7b527226f3da87ff40c9" + }, + { + "dataPath": "params_shard_301.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.61.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "aef690d2ff8ee3ab9e06cdbabd55b31e" + }, + { + "dataPath": "params_shard_302.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.61.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "ba37594337bcbe24871d5ae685ad9057" + }, + { + "dataPath": "params_shard_303.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.60.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.60.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.61.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.61.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.61.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.61.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.61.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "daa90c3c877001ecffb1204c59b51bb5" + }, + { + "dataPath": "params_shard_304.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.62.mlp.down_proj.q_weight", + "shape": [ + 5120, + 3456 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "052a9a1ccbccfd5a5b9d64f13cdee5a7" + }, + { + "dataPath": "params_shard_305.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.62.mlp.gate_up_proj.q_weight", + "shape": [ + 55296, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "b47aefb1a9a03c87c30f0637d222eeff" + }, + { + "dataPath": "params_shard_306.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.62.mlp.gate_up_proj.q_scale", + "shape": [ + 55296, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "041484322413acba834a22d6dbf3d7b9" + }, + { + "dataPath": "params_shard_307.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.62.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "d94024b71be07b8d5f06d51f59567e20" + }, + { + "dataPath": "params_shard_308.bin", + "format": "raw-shard", + "nbytes": 25921536, + "records": [ + { + "name": "model.layers.61.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.61.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.62.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 14745600 + }, + { + "name": "model.layers.62.mlp.down_proj.q_scale", + "shape": [ + 5120, + 864 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.62.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 23603200 + }, + { + "name": "model.layers.62.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 23613440 + }, + { + "name": "model.layers.62.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 23627776 + } + ], + "md5sum": "e0a600430f883a49b91ef83998982d3f" + }, + { + "dataPath": "params_shard_309.bin", + "format": "raw-shard", + "nbytes": 33110016, + "records": [ + { + "name": "model.layers.62.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.62.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 13107200 + }, + { + "name": "model.layers.63.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 14745600 + }, + { + "name": "model.layers.63.self_attn.c_attn.q_weight", + "shape": [ + 7168, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 14759936 + } + ], + "md5sum": "10c8b60ad7c2f421ccbc8dd996a4a3cc" + }, + { + "dataPath": "params_shard_310.bin", + "format": "raw-shard", + "nbytes": 17039360, + "records": [ + { + "name": "model.layers.63.self_attn.c_attn.q_scale", + "shape": [ + 7168, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 2293760, + "byteOffset": 0 + }, + { + "name": "model.layers.63.self_attn.o_proj.q_weight", + "shape": [ + 5120, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 2293760 + }, + { + "name": "model.layers.63.self_attn.o_proj.q_scale", + "shape": [ + 5120, + 160 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1638400, + "byteOffset": 15400960 + } + ], + "md5sum": "6971b2f4bdb3d80c83e473597bb127ec" + } + ] +} \ No newline at end of file