{ "metadata": { "ParamSize": 260, "ParamBytes": 11181772800.0, "BitsPerParam": 32.0 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 257556480, "records": [ { "name": "lm_head.weight", "shape": [ 50304, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 257556480, "byteOffset": 0 } ], "md5sum": "99eb0ede31a902b93147feb99d9bd47d" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 257556480, "records": [ { "name": "model.embed_tokens.weight", "shape": [ 50304, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 257556480, "byteOffset": 0 } ], "md5sum": "1420f5360de15c3a6f20ce7769219710" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.0.mlp.down_proj.weight", "shape": [ 2560, 6912 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "70ecf379e529f532e184a114fa5f7c5f" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.weight", "shape": [ 13824, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "ee1ad9994c5f70603a9e8c9b183d808e" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.0.self_attn.qkv_proj.weight", "shape": [ 7680, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "ac38295359989cb809773c375a2678ae" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.1.mlp.down_proj.weight", "shape": [ 2560, 6912 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "e1c1ec2ab45426cd7e4f26505c50a778" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.weight", "shape": [ 13824, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "b583b8645e7384a1216a8184115fa2f2" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.1.self_attn.qkv_proj.weight", "shape": [ 7680, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "35954e59ad9beb75790cd262d64dc677" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.10.mlp.down_proj.weight", "shape": [ 2560, 6912 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "02d233e2e4cdbea8d043ff441cb09aa7" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.weight", "shape": [ 13824, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "7566580833a0a4445f2041f65fcc7e66" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.10.self_attn.qkv_proj.weight", "shape": [ 7680, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "9935d5d7f8c20a9e22b0c3c011f5c6b9" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 26275840, "records": [ { "name": "model.layers.0.input_layernorm.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 0 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 5120 }, { "name": "model.layers.0.post_attention_layernorm.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 10240 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 15360 }, { "name": "model.layers.0.self_attn.o_proj.weight", "shape": [ 2560, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 20480 }, { "name": "model.layers.1.input_layernorm.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13127680 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13132800 }, { "name": "model.layers.1.post_attention_layernorm.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13137920 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13143040 }, { "name": "model.layers.1.self_attn.o_proj.weight", "shape": [ 2560, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13148160 }, { "name": "model.layers.10.input_layernorm.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26255360 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26260480 }, { "name": "model.layers.10.post_attention_layernorm.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26265600 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26270720 } ], "md5sum": "fe1af670d2514a5ff1cd60cc3ae3610b" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.11.mlp.down_proj.weight", "shape": [ 2560, 6912 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "568387cc5e4071a81d719bd1315dc46f" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.weight", "shape": [ 13824, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "7b8ce4f2a356f8fd77811e50ee3494d7" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.11.self_attn.qkv_proj.weight", "shape": [ 7680, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "60be05c22284c2e5afbaf31f12c0f227" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.12.mlp.down_proj.weight", "shape": [ 2560, 6912 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "df84937ec8af3f250774a20843018acd" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.weight", "shape": [ 13824, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "e142f525e885f7653a2260ec17d304e3" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.12.self_attn.qkv_proj.weight", "shape": [ 7680, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "4c509399fdf16dbf48f26edb6c759114" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 26255360, "records": [ { "name": "model.layers.10.self_attn.o_proj.weight", "shape": [ 2560, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.11.input_layernorm.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13107200 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13112320 }, { "name": "model.layers.11.post_attention_layernorm.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13117440 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13122560 }, { "name": "model.layers.11.self_attn.o_proj.weight", "shape": [ 2560, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13127680 }, { "name": "model.layers.12.input_layernorm.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26234880 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26240000 }, { "name": "model.layers.12.post_attention_layernorm.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26245120 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26250240 } ], "md5sum": "f2e3f540212bca3f4021d2220196b1c4" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.13.mlp.down_proj.weight", "shape": [ 2560, 6912 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "5869255516b1bdc16ee7b009a7558e61" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.weight", "shape": [ 13824, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "8390fb3651f6fafa43e6d13250da459b" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.13.self_attn.qkv_proj.weight", "shape": [ 7680, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "c7d80658bad27a2075ee2909fbd45c3d" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.14.mlp.down_proj.weight", "shape": [ 2560, 6912 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "39531349b116301e1a534eee9d5cda18" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.weight", "shape": [ 13824, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "5f09cac29ba1c85827122cf43405db26" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.14.self_attn.qkv_proj.weight", "shape": [ 7680, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "86a03a42b444179917b62c49c8299a8f" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 26255360, "records": [ { "name": "model.layers.12.self_attn.o_proj.weight", "shape": [ 2560, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.13.input_layernorm.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13107200 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13112320 }, { "name": "model.layers.13.post_attention_layernorm.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13117440 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13122560 }, { "name": "model.layers.13.self_attn.o_proj.weight", "shape": [ 2560, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13127680 }, { "name": "model.layers.14.input_layernorm.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26234880 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26240000 }, { "name": "model.layers.14.post_attention_layernorm.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26245120 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26250240 } ], "md5sum": "9068a4ac81157c0b8b62cdd5fe66f4a2" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.15.mlp.down_proj.weight", "shape": [ 2560, 6912 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "3c0b80880233f0ff152e4ac482eecd80" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.weight", "shape": [ 13824, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "7645abb618c7898d9b8c9ee5e2154b56" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.15.self_attn.qkv_proj.weight", "shape": [ 7680, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "49b8b1e8adc93dcf0093e7e8b2db4f57" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.16.mlp.down_proj.weight", "shape": [ 2560, 6912 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "52934a944067c74144b12dc9432441b9" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.weight", "shape": [ 13824, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "6af97de98960425df08a492010542aad" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.16.self_attn.qkv_proj.weight", "shape": [ 7680, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "705332bb90492da14aa53e33ec6f1fb1" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 26255360, "records": [ { "name": "model.layers.14.self_attn.o_proj.weight", "shape": [ 2560, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.15.input_layernorm.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13107200 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13112320 }, { "name": "model.layers.15.post_attention_layernorm.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13117440 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13122560 }, { "name": "model.layers.15.self_attn.o_proj.weight", "shape": [ 2560, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13127680 }, { "name": "model.layers.16.input_layernorm.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26234880 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26240000 }, { "name": "model.layers.16.post_attention_layernorm.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26245120 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26250240 } ], "md5sum": "68f6dcca292e731bd8f2b48de4bd170f" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.17.mlp.down_proj.weight", "shape": [ 2560, 6912 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "39e7225f77dbc9b4a45474ad0c19525a" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.weight", "shape": [ 13824, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "902d9adf4fa729bbafae05769fec711e" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.17.self_attn.qkv_proj.weight", "shape": [ 7680, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "53f3ebdb365e59d52e64ee950750f7b7" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.18.mlp.down_proj.weight", "shape": [ 2560, 6912 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "2012ff05090ef906647a7652cdd16fbc" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.weight", "shape": [ 13824, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "9df1fcac9475d8074e4080f71ccc2c4b" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.18.self_attn.qkv_proj.weight", "shape": [ 7680, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "acd9b665f7a3dfdb0a964bea34692eef" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 26255360, "records": [ { "name": "model.layers.16.self_attn.o_proj.weight", "shape": [ 2560, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.17.input_layernorm.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13107200 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13112320 }, { "name": "model.layers.17.post_attention_layernorm.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13117440 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13122560 }, { "name": "model.layers.17.self_attn.o_proj.weight", "shape": [ 2560, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13127680 }, { "name": "model.layers.18.input_layernorm.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26234880 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26240000 }, { "name": "model.layers.18.post_attention_layernorm.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26245120 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26250240 } ], "md5sum": "4c97193f1661f62b7b34a83821ed89f4" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.19.mlp.down_proj.weight", "shape": [ 2560, 6912 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "bee738becb48f282a724c6d10264222c" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.weight", "shape": [ 13824, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "c00819fb6e1e5561cca84732e8c688f3" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.19.self_attn.qkv_proj.weight", "shape": [ 7680, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "908e4f1882b297e80572f2c6b3c71be1" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.2.mlp.down_proj.weight", "shape": [ 2560, 6912 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "9ca8cbbcb9f8cb7e9aaeab45c236d6ed" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.weight", "shape": [ 13824, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "7a7b92df65ab0131a90b6375955acba2" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.2.self_attn.qkv_proj.weight", "shape": [ 7680, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "c2ed32867ee9e210ce73781bd3df651c" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 26255360, "records": [ { "name": "model.layers.18.self_attn.o_proj.weight", "shape": [ 2560, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.19.input_layernorm.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13107200 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13112320 }, { "name": "model.layers.19.post_attention_layernorm.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13117440 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13122560 }, { "name": "model.layers.19.self_attn.o_proj.weight", "shape": [ 2560, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13127680 }, { "name": "model.layers.2.input_layernorm.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26234880 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26240000 }, { "name": "model.layers.2.post_attention_layernorm.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26245120 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26250240 } ], "md5sum": "a431061920da8aa07191ad1ef06ae585" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.20.mlp.down_proj.weight", "shape": [ 2560, 6912 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "2cf015c1421c06236ea0e414c579c158" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.weight", "shape": [ 13824, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "c8a809f8b260662991eadf2c89afed0e" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.20.self_attn.qkv_proj.weight", "shape": [ 7680, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "eb442f5cbe2dd408a042ff70b6d1d78b" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.21.mlp.down_proj.weight", "shape": [ 2560, 6912 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "a127d63d90fbdcf0295a6314a59c38a0" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.weight", "shape": [ 13824, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "4ac4dbe8e38573eeec2fc511e8fcfe55" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.21.self_attn.qkv_proj.weight", "shape": [ 7680, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "6a63aeb2ed620bebeb8d14a5db9dc50d" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 26255360, "records": [ { "name": "model.layers.2.self_attn.o_proj.weight", "shape": [ 2560, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.20.input_layernorm.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13107200 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13112320 }, { "name": "model.layers.20.post_attention_layernorm.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13117440 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13122560 }, { "name": "model.layers.20.self_attn.o_proj.weight", "shape": [ 2560, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13127680 }, { "name": "model.layers.21.input_layernorm.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26234880 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26240000 }, { "name": "model.layers.21.post_attention_layernorm.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26245120 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26250240 } ], "md5sum": "04c4ab7b78313131abcc066f8b8f08d1" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.22.mlp.down_proj.weight", "shape": [ 2560, 6912 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "3e13d352a63d0c2ab24dd92776d9b9d4" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.weight", "shape": [ 13824, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "89416d3251bc5ab021d4cde8389df226" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.22.self_attn.qkv_proj.weight", "shape": [ 7680, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "ad60f34e9470803a590668d38f571cdf" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.23.mlp.down_proj.weight", "shape": [ 2560, 6912 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "4130d4f06d3af7fdd00060bd66f9e9c0" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.weight", "shape": [ 13824, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "e8191fc3196b458a2682d8001a6a6a62" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.23.self_attn.qkv_proj.weight", "shape": [ 7680, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "307aff9e249c46c654d486542f66319f" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 26255360, "records": [ { "name": "model.layers.21.self_attn.o_proj.weight", "shape": [ 2560, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.22.input_layernorm.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13107200 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13112320 }, { "name": "model.layers.22.post_attention_layernorm.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13117440 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13122560 }, { "name": "model.layers.22.self_attn.o_proj.weight", "shape": [ 2560, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13127680 }, { "name": "model.layers.23.input_layernorm.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26234880 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26240000 }, { "name": "model.layers.23.post_attention_layernorm.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26245120 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26250240 } ], "md5sum": "2cc1fd120e8f0b22966e0f28167f3840" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.24.mlp.down_proj.weight", "shape": [ 2560, 6912 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "a4c25bce39dd1a29c6f03c3651ca568c" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.weight", "shape": [ 13824, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "1dd537b0185667f3e562b6eb24e35257" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.24.self_attn.qkv_proj.weight", "shape": [ 7680, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "9d993ab727558107c9105553865ec33f" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.25.mlp.down_proj.weight", "shape": [ 2560, 6912 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "3a9f4dea298cb5fc728cbc114d9e0a08" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.weight", "shape": [ 13824, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "1567f5f202134f1b1c872f8bd60f3cc0" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.25.self_attn.qkv_proj.weight", "shape": [ 7680, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "375f6bc8930133672f1184d261df3db9" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 26255360, "records": [ { "name": "model.layers.23.self_attn.o_proj.weight", "shape": [ 2560, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.24.input_layernorm.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13107200 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13112320 }, { "name": "model.layers.24.post_attention_layernorm.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13117440 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13122560 }, { "name": "model.layers.24.self_attn.o_proj.weight", "shape": [ 2560, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13127680 }, { "name": "model.layers.25.input_layernorm.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26234880 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26240000 }, { "name": "model.layers.25.post_attention_layernorm.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26245120 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26250240 } ], "md5sum": "b9af6516f2a8fd68e32f6f39c242d9f8" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.26.mlp.down_proj.weight", "shape": [ 2560, 6912 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "696f254d8ce817a88bd0314b0000ad0b" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.weight", "shape": [ 13824, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "c81f42fe2250842393845dda77908bad" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.26.self_attn.qkv_proj.weight", "shape": [ 7680, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "438b307c84f6ab42eb14bffe65395de7" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.27.mlp.down_proj.weight", "shape": [ 2560, 6912 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "7517251998df10c79e89b38e3c65c45e" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.weight", "shape": [ 13824, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "72dfc3068787875d939c848b304fb151" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.27.self_attn.qkv_proj.weight", "shape": [ 7680, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "b9112ad1aa82c521bd4016390ffffbce" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 26255360, "records": [ { "name": "model.layers.25.self_attn.o_proj.weight", "shape": [ 2560, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.26.input_layernorm.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13107200 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13112320 }, { "name": "model.layers.26.post_attention_layernorm.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13117440 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13122560 }, { "name": "model.layers.26.self_attn.o_proj.weight", "shape": [ 2560, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13127680 }, { "name": "model.layers.27.input_layernorm.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26234880 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26240000 }, { "name": "model.layers.27.post_attention_layernorm.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26245120 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26250240 } ], "md5sum": "f7b5468922320ec7fd16fe1a8de71e67" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.28.mlp.down_proj.weight", "shape": [ 2560, 6912 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "b44012f0911bd7a6eaf3afe8c620d773" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.weight", "shape": [ 13824, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "7ed42bf7d604bd9bb931bbc76f3b0876" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.28.self_attn.qkv_proj.weight", "shape": [ 7680, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "b8414fe2548b6461e53d96e5ada1f06c" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.29.mlp.down_proj.weight", "shape": [ 2560, 6912 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "c2d18e497f0e7dedaefc5821de33a2f6" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.weight", "shape": [ 13824, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "fe0caaa9c12491490e0cab979afbfbde" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.29.self_attn.qkv_proj.weight", "shape": [ 7680, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "508ef01762e88879118270e32663b0b5" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 26255360, "records": [ { "name": "model.layers.27.self_attn.o_proj.weight", "shape": [ 2560, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.28.input_layernorm.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13107200 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13112320 }, { "name": "model.layers.28.post_attention_layernorm.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13117440 }, { "name": "model.layers.28.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13122560 }, { "name": "model.layers.28.self_attn.o_proj.weight", "shape": [ 2560, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13127680 }, { "name": "model.layers.29.input_layernorm.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26234880 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26240000 }, { "name": "model.layers.29.post_attention_layernorm.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26245120 }, { "name": "model.layers.29.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26250240 } ], "md5sum": "10356d54769c1cf4daf6d0d061e7597c" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.3.mlp.down_proj.weight", "shape": [ 2560, 6912 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "8a108425106fe155e99ba85b8feeb0c7" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.weight", "shape": [ 13824, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "109335d942b6b32472d978fca6588fb8" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.3.self_attn.qkv_proj.weight", "shape": [ 7680, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "84e4df98dea25864908845db2a24a59a" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.30.mlp.down_proj.weight", "shape": [ 2560, 6912 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "ba41ab48227b018fd1ac9d8f4b907552" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.weight", "shape": [ 13824, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "b8631ee37d36999dae3cfad9289afc1e" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.30.self_attn.qkv_proj.weight", "shape": [ 7680, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "1fd45eb4e9cec596381ff42208720900" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 26255360, "records": [ { "name": "model.layers.29.self_attn.o_proj.weight", "shape": [ 2560, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.3.input_layernorm.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13107200 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13112320 }, { "name": "model.layers.3.post_attention_layernorm.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13117440 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13122560 }, { "name": "model.layers.3.self_attn.o_proj.weight", "shape": [ 2560, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13127680 }, { "name": "model.layers.30.input_layernorm.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26234880 }, { "name": "model.layers.30.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26240000 }, { "name": "model.layers.30.post_attention_layernorm.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26245120 }, { "name": "model.layers.30.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26250240 } ], "md5sum": "7f61fad719562a9d2ad0a0b85ed57a2a" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.31.mlp.down_proj.weight", "shape": [ 2560, 6912 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "33947ce2249c4b5ffaf46fd7d67c5cf6" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.weight", "shape": [ 13824, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "b7de03cd37f1c00920ada1a37ed7d3f1" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.31.self_attn.qkv_proj.weight", "shape": [ 7680, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "7e0c3cc7e71f02cbf653e1274ef5a8e5" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.4.mlp.down_proj.weight", "shape": [ 2560, 6912 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "f5f2724c28985d25a65242b42ee0ac2f" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.weight", "shape": [ 13824, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "4f7de00252f2fe522d59366d5d9f9afc" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.4.self_attn.qkv_proj.weight", "shape": [ 7680, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "7c6c5740a6068dd6859ada709e37cb90" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 26255360, "records": [ { "name": "model.layers.30.self_attn.o_proj.weight", "shape": [ 2560, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.31.input_layernorm.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13107200 }, { "name": "model.layers.31.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13112320 }, { "name": "model.layers.31.post_attention_layernorm.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13117440 }, { "name": "model.layers.31.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13122560 }, { "name": "model.layers.31.self_attn.o_proj.weight", "shape": [ 2560, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13127680 }, { "name": "model.layers.4.input_layernorm.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26234880 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26240000 }, { "name": "model.layers.4.post_attention_layernorm.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26245120 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26250240 } ], "md5sum": "893f7edce6b6c9a9dcdec992491854ad" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.5.mlp.down_proj.weight", "shape": [ 2560, 6912 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "f52a6c84a519d353902ff607c518f070" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.weight", "shape": [ 13824, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "74a260d6e729686df3d796b17118c9de" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.5.self_attn.qkv_proj.weight", "shape": [ 7680, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "07a2a84cfc4b5d3a53bebc8c0868160e" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.6.mlp.down_proj.weight", "shape": [ 2560, 6912 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "7b48aeb51b310a06fdf40f43b6b8d0c1" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.weight", "shape": [ 13824, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "734f8079c35b12db55465490310212fb" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.6.self_attn.qkv_proj.weight", "shape": [ 7680, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "085e9c636cc0d78ccf0d7805348a3061" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 26255360, "records": [ { "name": "model.layers.4.self_attn.o_proj.weight", "shape": [ 2560, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.5.input_layernorm.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13107200 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13112320 }, { "name": "model.layers.5.post_attention_layernorm.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13117440 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13122560 }, { "name": "model.layers.5.self_attn.o_proj.weight", "shape": [ 2560, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13127680 }, { "name": "model.layers.6.input_layernorm.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26234880 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26240000 }, { "name": "model.layers.6.post_attention_layernorm.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26245120 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26250240 } ], "md5sum": "d15703d436cfeb81200d84e323d5e6b3" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.7.mlp.down_proj.weight", "shape": [ 2560, 6912 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "370c6a2766d203f1dafb567ecee6d851" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.weight", "shape": [ 13824, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "04dd3bcc76eccadc6235fa8ab8cc3236" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.7.self_attn.qkv_proj.weight", "shape": [ 7680, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "6a624b50b66b6b48f90e1313da7bfbe6" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.8.mlp.down_proj.weight", "shape": [ 2560, 6912 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "7fb3fc9f444dc78b7400ff30e2efba94" }, { "dataPath": "params_shard_107.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.weight", "shape": [ 13824, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "7984731a769cdd06a6f3f5d7dfbc2a04" }, { "dataPath": "params_shard_108.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.8.self_attn.qkv_proj.weight", "shape": [ 7680, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "34d89066ad1a714f3eabfb6c5fc2bb7d" }, { "dataPath": "params_shard_109.bin", "format": "raw-shard", "nbytes": 26255360, "records": [ { "name": "model.layers.6.self_attn.o_proj.weight", "shape": [ 2560, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.7.input_layernorm.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13107200 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13112320 }, { "name": "model.layers.7.post_attention_layernorm.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13117440 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13122560 }, { "name": "model.layers.7.self_attn.o_proj.weight", "shape": [ 2560, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13127680 }, { "name": "model.layers.8.input_layernorm.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26234880 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26240000 }, { "name": "model.layers.8.post_attention_layernorm.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26245120 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26250240 } ], "md5sum": "b4ca7a650917185cc46521475941d514" }, { "dataPath": "params_shard_110.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.9.mlp.down_proj.weight", "shape": [ 2560, 6912 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "aee760bf032dec0a6a340953ee7731a7" }, { "dataPath": "params_shard_111.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.weight", "shape": [ 13824, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "beb2232023d1f19219e26a51a1b92afd" }, { "dataPath": "params_shard_112.bin", "format": "raw-shard", "nbytes": 39321600, "records": [ { "name": "model.layers.9.self_attn.qkv_proj.weight", "shape": [ 7680, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 39321600, "byteOffset": 0 } ], "md5sum": "caec1303a946754faffccc92b1254e4f" }, { "dataPath": "params_shard_113.bin", "format": "raw-shard", "nbytes": 26245120, "records": [ { "name": "model.layers.8.self_attn.o_proj.weight", "shape": [ 2560, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.9.input_layernorm.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13107200 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13112320 }, { "name": "model.layers.9.post_attention_layernorm.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13117440 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 13122560 }, { "name": "model.layers.9.self_attn.o_proj.weight", "shape": [ 2560, 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 13127680 }, { "name": "model.norm.bias", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26234880 }, { "name": "model.norm.weight", "shape": [ 2560 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5120, "byteOffset": 26240000 } ], "md5sum": "dbfb209f08c627381e1ee1069911ad04" } ] }