diff --git "a/ndarray-cache.json" "b/ndarray-cache.json" new file mode 100644--- /dev/null +++ "b/ndarray-cache.json" @@ -0,0 +1,5382 @@ +{ + "metadata": { + "ParamSize": 370, + "ParamBytes": 54454256640.0, + "BitsPerParam": 14.686606395927337 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 2359296000, + "records": [ + { + "name": "model.embed_tokens.weight", + "shape": [ + 256000, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2359296000, + "byteOffset": 0 + } + ], + "md5sum": "dc36e04174ceac3d7d23463b5d0d5181" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 339738624, + "records": [ + { + "name": "model.layers.0.mlp.down_proj.weight", + "shape": [ + 4608, + 36864 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 339738624, + "byteOffset": 0 + } + ], + "md5sum": "4f0e853727e47613806ffa9c4c482ff2" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 679477248, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.weight", + "shape": [ + 73728, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 679477248, + "byteOffset": 0 + } + ], + "md5sum": "21fc9a247c7e078674e21f55791089e3" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.0.self_attn.qkv_proj.weight", + "shape": [ + 8192, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "7b6c03f580fe4119384785ef11ca4387" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 37748736, + "records": [ + { + "name": "model.layers.0.self_attn.o_proj.weight", + "shape": [ + 4608, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 37748736, + "byteOffset": 0 + } + ], + "md5sum": "0a4c23fe136646c69827d68a51d94fff" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 339738624, + "records": [ + { + "name": "model.layers.1.mlp.down_proj.weight", + "shape": [ + 4608, + 36864 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 339738624, + "byteOffset": 0 + } + ], + "md5sum": "6b485dacf260f4fa5690ad5f9c056cdb" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 679477248, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.weight", + "shape": [ + 73728, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 679477248, + "byteOffset": 0 + } + ], + "md5sum": "72d93a6fa7ce8ef45f7e92291de659a7" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.1.self_attn.qkv_proj.weight", + "shape": [ + 8192, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "faebe2c0b094d27995cea1fb726b162a" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 37748736, + "records": [ + { + "name": "model.layers.1.self_attn.o_proj.weight", + "shape": [ + 4608, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 37748736, + "byteOffset": 0 + } + ], + "md5sum": "b7486ddadec0d8c5b2cdfbd015070d84" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.2.self_attn.qkv_proj.weight", + "shape": [ + 8192, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "a29efe9166112e96288dbca63318d224" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 37748736, + "records": [ + { + "name": "model.layers.2.self_attn.o_proj.weight", + "shape": [ + 4608, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 37748736, + "byteOffset": 0 + } + ], + "md5sum": "d6c07888f84c4d7b9207c8af8812c8c8" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 339738624, + "records": [ + { + "name": "model.layers.10.mlp.down_proj.weight", + "shape": [ + 4608, + 36864 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 339738624, + "byteOffset": 0 + } + ], + "md5sum": "cc641d45e6a9f09da1abeb52f359a23a" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 339738624, + "records": [ + { + "name": "model.layers.11.mlp.down_proj.weight", + "shape": [ + 4608, + 36864 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 339738624, + "byteOffset": 0 + } + ], + "md5sum": "f04105260c6b8b5cea9222a47bc2eed2" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 679477248, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.weight", + "shape": [ + 73728, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 679477248, + "byteOffset": 0 + } + ], + "md5sum": "c88be0bc9d4893fcd7ec78bc4190a4b5" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.11.self_attn.qkv_proj.weight", + "shape": [ + 8192, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "96bc024d0029894355a306ed1ca2c93a" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 37748736, + "records": [ + { + "name": "model.layers.11.self_attn.o_proj.weight", + "shape": [ + 4608, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 37748736, + "byteOffset": 0 + } + ], + "md5sum": "eea52126cae035d5862119a6130f4b95" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 339738624, + "records": [ + { + "name": "model.layers.12.mlp.down_proj.weight", + "shape": [ + 4608, + 36864 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 339738624, + "byteOffset": 0 + } + ], + "md5sum": "c35f2943c085189db06e8fa5fdefcac2" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 679477248, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.weight", + "shape": [ + 73728, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 679477248, + "byteOffset": 0 + } + ], + "md5sum": "e2f728f041fea40538f97df2a74b6a61" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.12.self_attn.qkv_proj.weight", + "shape": [ + 8192, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "53977f25830b1d83063393cf6e06e11b" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 37748736, + "records": [ + { + "name": "model.layers.12.self_attn.o_proj.weight", + "shape": [ + 4608, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 37748736, + "byteOffset": 0 + } + ], + "md5sum": "1d7c1b03ed5c22fd10e07f222ce40343" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 339738624, + "records": [ + { + "name": "model.layers.13.mlp.down_proj.weight", + "shape": [ + 4608, + 36864 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 339738624, + "byteOffset": 0 + } + ], + "md5sum": "2a9e7c74a08e7d4a740ef5cc1d16e1e4" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 679477248, + "records": [ + { + "name": "model.layers.13.mlp.gate_up_proj.weight", + "shape": [ + 73728, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 679477248, + "byteOffset": 0 + } + ], + "md5sum": "60f7265756557068a21f4a74a00e5065" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.13.self_attn.qkv_proj.weight", + "shape": [ + 8192, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "2f08ac47e48250c696cf73e100725deb" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 37748736, + "records": [ + { + "name": "model.layers.13.self_attn.o_proj.weight", + "shape": [ + 4608, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 37748736, + "byteOffset": 0 + } + ], + "md5sum": "0f343ca21f549dfe46fc541759f83444" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 339738624, + "records": [ + { + "name": "model.layers.14.mlp.down_proj.weight", + "shape": [ + 4608, + 36864 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 339738624, + "byteOffset": 0 + } + ], + "md5sum": "3c573cb4a07c2dbf1ba61c2bbdbe7da5" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 679477248, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.weight", + "shape": [ + 73728, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 679477248, + "byteOffset": 0 + } + ], + "md5sum": "f953f21956b50b4bc9e2d1b584883141" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.14.self_attn.qkv_proj.weight", + "shape": [ + 8192, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "ce383a398f02af492b0bc7b8eef97aaa" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 37748736, + "records": [ + { + "name": "model.layers.14.self_attn.o_proj.weight", + "shape": [ + 4608, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 37748736, + "byteOffset": 0 + } + ], + "md5sum": "06b9fa6b136e7cdc917af3a48d5f696a" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.15.self_attn.qkv_proj.weight", + "shape": [ + 8192, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "fb73d3cf5113e78ecc793631c659d8d4" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 37748736, + "records": [ + { + "name": "model.layers.15.self_attn.o_proj.weight", + "shape": [ + 4608, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 37748736, + "byteOffset": 0 + } + ], + "md5sum": "6f0b10d5db85c567deb5d966759af0cd" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 679477248, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.weight", + "shape": [ + 73728, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 679477248, + "byteOffset": 0 + } + ], + "md5sum": "fe0ae2bbb18c6e0179cbe97833c5cd6d" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.10.self_attn.qkv_proj.weight", + "shape": [ + 8192, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "e8bc0f6ed5fdff9f85e6ba0284f66b01" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 37748736, + "records": [ + { + "name": "model.layers.10.self_attn.o_proj.weight", + "shape": [ + 4608, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 37748736, + "byteOffset": 0 + } + ], + "md5sum": "8c2719dbc5fd7513a948c92de098e927" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 339738624, + "records": [ + { + "name": "model.layers.6.mlp.down_proj.weight", + "shape": [ + 4608, + 36864 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 339738624, + "byteOffset": 0 + } + ], + "md5sum": "2380c8423932cc6b542b0574b83200c2" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 679477248, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.weight", + "shape": [ + 73728, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 679477248, + "byteOffset": 0 + } + ], + "md5sum": "196e535102df782e482ca3147cc1c127" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 339738624, + "records": [ + { + "name": "model.layers.7.mlp.down_proj.weight", + "shape": [ + 4608, + 36864 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 339738624, + "byteOffset": 0 + } + ], + "md5sum": "d047b7e99b3dd4be9c7c7faa6798d5d0" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 679477248, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.weight", + "shape": [ + 73728, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 679477248, + "byteOffset": 0 + } + ], + "md5sum": "9d4564c6ca52ada14b4630ca4be84c3b" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.7.self_attn.qkv_proj.weight", + "shape": [ + 8192, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "874c1a06468f373519867ffcefb370cc" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 37748736, + "records": [ + { + "name": "model.layers.7.self_attn.o_proj.weight", + "shape": [ + 4608, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 37748736, + "byteOffset": 0 + } + ], + "md5sum": "e4d9c2ee9c5e0103a51777f02ce8eb0c" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 339738624, + "records": [ + { + "name": "model.layers.8.mlp.down_proj.weight", + "shape": [ + 4608, + 36864 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 339738624, + "byteOffset": 0 + } + ], + "md5sum": "0cae21a4e33d59c1ec4539eb13568d37" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 679477248, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.weight", + "shape": [ + 73728, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 679477248, + "byteOffset": 0 + } + ], + "md5sum": "7f50d6caf5e55da261262ef68a0d793d" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.8.self_attn.qkv_proj.weight", + "shape": [ + 8192, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "a76dce23ab85b669e0f2928bbbf0867c" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 37748736, + "records": [ + { + "name": "model.layers.8.self_attn.o_proj.weight", + "shape": [ + 4608, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 37748736, + "byteOffset": 0 + } + ], + "md5sum": "c8480558f1f90dad78885e50b97d6c48" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 339738624, + "records": [ + { + "name": "model.layers.9.mlp.down_proj.weight", + "shape": [ + 4608, + 36864 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 339738624, + "byteOffset": 0 + } + ], + "md5sum": "1b885b2200798b7cfd1879572c59352e" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 679477248, + "records": [ + { + "name": "model.layers.9.mlp.gate_up_proj.weight", + "shape": [ + 73728, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 679477248, + "byteOffset": 0 + } + ], + "md5sum": "77f9abf4964a372bc69d2d3796e2589b" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.9.self_attn.qkv_proj.weight", + "shape": [ + 8192, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "c59b3117c62bda38b842653d6ceb375d" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 37748736, + "records": [ + { + "name": "model.layers.9.self_attn.o_proj.weight", + "shape": [ + 4608, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 37748736, + "byteOffset": 0 + } + ], + "md5sum": "02d1044d98322c2a291a82cebe0e7857" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 339738624, + "records": [ + { + "name": "model.layers.15.mlp.down_proj.weight", + "shape": [ + 4608, + 36864 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 339738624, + "byteOffset": 0 + } + ], + "md5sum": "906edd6dcbe6278695ca0e848c69e583" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 679477248, + "records": [ + { + "name": "model.layers.15.mlp.gate_up_proj.weight", + "shape": [ + 73728, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 679477248, + "byteOffset": 0 + } + ], + "md5sum": "5e9554edf16196c92858917406ea400a" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 339738624, + "records": [ + { + "name": "model.layers.16.mlp.down_proj.weight", + "shape": [ + 4608, + 36864 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 339738624, + "byteOffset": 0 + } + ], + "md5sum": "08942848f84502d3a9872e335ca3fec2" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 679477248, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.weight", + "shape": [ + 73728, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 679477248, + "byteOffset": 0 + } + ], + "md5sum": "dc4db64c4ae40646745079d1a6c45b97" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.16.self_attn.qkv_proj.weight", + "shape": [ + 8192, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "4f77afd133ed31ce97d662c303318f8b" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 37748736, + "records": [ + { + "name": "model.layers.16.self_attn.o_proj.weight", + "shape": [ + 4608, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 37748736, + "byteOffset": 0 + } + ], + "md5sum": "29aaabfda8a389edea789a19b10efd58" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 339738624, + "records": [ + { + "name": "model.layers.17.mlp.down_proj.weight", + "shape": [ + 4608, + 36864 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 339738624, + "byteOffset": 0 + } + ], + "md5sum": "876710b458576180c5bdd37723a12cff" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 679477248, + "records": [ + { + "name": "model.layers.17.mlp.gate_up_proj.weight", + "shape": [ + 73728, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 679477248, + "byteOffset": 0 + } + ], + "md5sum": "edf947b8a8028c4e7278f496746a3d69" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.17.self_attn.qkv_proj.weight", + "shape": [ + 8192, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "65a0be861a8dc6fdcf2e7c2353637e3f" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 37748736, + "records": [ + { + "name": "model.layers.17.self_attn.o_proj.weight", + "shape": [ + 4608, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 37748736, + "byteOffset": 0 + } + ], + "md5sum": "f7c84cdbd8fcbb6624d9bb17252cd1e2" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 339738624, + "records": [ + { + "name": "model.layers.18.mlp.down_proj.weight", + "shape": [ + 4608, + 36864 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 339738624, + "byteOffset": 0 + } + ], + "md5sum": "8a1f6df7d173c58177b6ff49e44b6f60" + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 679477248, + "records": [ + { + "name": "model.layers.18.mlp.gate_up_proj.weight", + "shape": [ + 73728, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 679477248, + "byteOffset": 0 + } + ], + "md5sum": "2f7c9d760ed57a0a963a975739bceba8" + }, + { + "dataPath": "params_shard_59.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.18.self_attn.qkv_proj.weight", + "shape": [ + 8192, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "50c66672f6e350aa32133092f67e2325" + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 37748736, + "records": [ + { + "name": "model.layers.18.self_attn.o_proj.weight", + "shape": [ + 4608, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 37748736, + "byteOffset": 0 + } + ], + "md5sum": "81798ecdb577f945f643e4e41d674c2c" + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 679477248, + "records": [ + { + "name": "model.layers.19.mlp.gate_up_proj.weight", + "shape": [ + 73728, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 679477248, + "byteOffset": 0 + } + ], + "md5sum": "ce6aaf2fd04fdf82e550b9c3b3b0a986" + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.19.self_attn.qkv_proj.weight", + "shape": [ + 8192, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "697236882302cf7618eb6a231156af19" + }, + { + "dataPath": "params_shard_63.bin", + "format": "raw-shard", + "nbytes": 37748736, + "records": [ + { + "name": "model.layers.19.self_attn.o_proj.weight", + "shape": [ + 4608, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 37748736, + "byteOffset": 0 + } + ], + "md5sum": "ccd5b13f6da68b770003bae7acd90081" + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 339738624, + "records": [ + { + "name": "model.layers.19.mlp.down_proj.weight", + "shape": [ + 4608, + 36864 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 339738624, + "byteOffset": 0 + } + ], + "md5sum": "87f8c1eb1c0635f3c6a45fdedfdab1c0" + }, + { + "dataPath": "params_shard_65.bin", + "format": "raw-shard", + "nbytes": 339738624, + "records": [ + { + "name": "model.layers.20.mlp.down_proj.weight", + "shape": [ + 4608, + 36864 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 339738624, + "byteOffset": 0 + } + ], + "md5sum": "0839d1f19482eaea044426792f2abad2" + }, + { + "dataPath": "params_shard_66.bin", + "format": "raw-shard", + "nbytes": 679477248, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.weight", + "shape": [ + 73728, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 679477248, + "byteOffset": 0 + } + ], + "md5sum": "b1f469791c6f561d7ab5378b3c573b68" + }, + { + "dataPath": "params_shard_67.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.20.self_attn.qkv_proj.weight", + "shape": [ + 8192, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "ad5e4eacb1548fdc4b2fa0c884cd2b5b" + }, + { + "dataPath": "params_shard_68.bin", + "format": "raw-shard", + "nbytes": 37748736, + "records": [ + { + "name": "model.layers.20.self_attn.o_proj.weight", + "shape": [ + 4608, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 37748736, + "byteOffset": 0 + } + ], + "md5sum": "056e0b725496bb0a1dee4375e1f60900" + }, + { + "dataPath": "params_shard_69.bin", + "format": "raw-shard", + "nbytes": 339738624, + "records": [ + { + "name": "model.layers.21.mlp.down_proj.weight", + "shape": [ + 4608, + 36864 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 339738624, + "byteOffset": 0 + } + ], + "md5sum": "00e7aea26a88ccb6e27fc43ee0b596c6" + }, + { + "dataPath": "params_shard_70.bin", + "format": "raw-shard", + "nbytes": 679477248, + "records": [ + { + "name": "model.layers.21.mlp.gate_up_proj.weight", + "shape": [ + 73728, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 679477248, + "byteOffset": 0 + } + ], + "md5sum": "632e2a9805f500bd67b9c8b46075ec96" + }, + { + "dataPath": "params_shard_71.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.21.self_attn.qkv_proj.weight", + "shape": [ + 8192, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "a19d25593152024db2cdb58653521249" + }, + { + "dataPath": "params_shard_72.bin", + "format": "raw-shard", + "nbytes": 37748736, + "records": [ + { + "name": "model.layers.21.self_attn.o_proj.weight", + "shape": [ + 4608, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 37748736, + "byteOffset": 0 + } + ], + "md5sum": "e1521585a47d49646f32f00cfeb28f13" + }, + { + "dataPath": "params_shard_73.bin", + "format": "raw-shard", + "nbytes": 339738624, + "records": [ + { + "name": "model.layers.22.mlp.down_proj.weight", + "shape": [ + 4608, + 36864 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 339738624, + "byteOffset": 0 + } + ], + "md5sum": "f7b5cf942e2ad27cb31b3d86bb934740" + }, + { + "dataPath": "params_shard_74.bin", + "format": "raw-shard", + "nbytes": 679477248, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.weight", + "shape": [ + 73728, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 679477248, + "byteOffset": 0 + } + ], + "md5sum": "bc20a1b9ce0d50def914fa3283740755" + }, + { + "dataPath": "params_shard_75.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.22.self_attn.qkv_proj.weight", + "shape": [ + 8192, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "181b6f0f812244ed28f845443394c2c3" + }, + { + "dataPath": "params_shard_76.bin", + "format": "raw-shard", + "nbytes": 37748736, + "records": [ + { + "name": "model.layers.22.self_attn.o_proj.weight", + "shape": [ + 4608, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 37748736, + "byteOffset": 0 + } + ], + "md5sum": "ccc07720caf4878f8ab28a00247574eb" + }, + { + "dataPath": "params_shard_77.bin", + "format": "raw-shard", + "nbytes": 679477248, + "records": [ + { + "name": "model.layers.23.mlp.gate_up_proj.weight", + "shape": [ + 73728, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 679477248, + "byteOffset": 0 + } + ], + "md5sum": "6e880ad3a769101bf5d42b3b2bd30428" + }, + { + "dataPath": "params_shard_78.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.23.self_attn.qkv_proj.weight", + "shape": [ + 8192, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "ae6d7000e8302c78c975881680afb875" + }, + { + "dataPath": "params_shard_79.bin", + "format": "raw-shard", + "nbytes": 37748736, + "records": [ + { + "name": "model.layers.23.self_attn.o_proj.weight", + "shape": [ + 4608, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 37748736, + "byteOffset": 0 + } + ], + "md5sum": "f0f8c0b3791755ffcb3e2bab3629b32e" + }, + { + "dataPath": "params_shard_80.bin", + "format": "raw-shard", + "nbytes": 339738624, + "records": [ + { + "name": "model.layers.2.mlp.down_proj.weight", + "shape": [ + 4608, + 36864 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 339738624, + "byteOffset": 0 + } + ], + "md5sum": "c6a1621ea851755cc1c676c40ba8396c" + }, + { + "dataPath": "params_shard_81.bin", + "format": "raw-shard", + "nbytes": 679477248, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.weight", + "shape": [ + 73728, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 679477248, + "byteOffset": 0 + } + ], + "md5sum": "8ed9f65c0b647b2794a456560a2a22e6" + }, + { + "dataPath": "params_shard_82.bin", + "format": "raw-shard", + "nbytes": 339738624, + "records": [ + { + "name": "model.layers.3.mlp.down_proj.weight", + "shape": [ + 4608, + 36864 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 339738624, + "byteOffset": 0 + } + ], + "md5sum": "58e2d4f9838fd2ddab6ce01a60a1c025" + }, + { + "dataPath": "params_shard_83.bin", + "format": "raw-shard", + "nbytes": 679477248, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.weight", + "shape": [ + 73728, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 679477248, + "byteOffset": 0 + } + ], + "md5sum": "a6b70b5bafedb22e5e6355741eb50fa3" + }, + { + "dataPath": "params_shard_84.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.3.self_attn.qkv_proj.weight", + "shape": [ + 8192, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "3093327e75aa189e4c1d62964f796cfb" + }, + { + "dataPath": "params_shard_85.bin", + "format": "raw-shard", + "nbytes": 37748736, + "records": [ + { + "name": "model.layers.3.self_attn.o_proj.weight", + "shape": [ + 4608, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 37748736, + "byteOffset": 0 + } + ], + "md5sum": "fc87c5e22378d6e9057ffeee62b139ff" + }, + { + "dataPath": "params_shard_86.bin", + "format": "raw-shard", + "nbytes": 339738624, + "records": [ + { + "name": "model.layers.4.mlp.down_proj.weight", + "shape": [ + 4608, + 36864 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 339738624, + "byteOffset": 0 + } + ], + "md5sum": "2d7be78a34ea444d640f252063277c3e" + }, + { + "dataPath": "params_shard_87.bin", + "format": "raw-shard", + "nbytes": 679477248, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.weight", + "shape": [ + 73728, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 679477248, + "byteOffset": 0 + } + ], + "md5sum": "7d86ef778cd55b79b4df50d4c34e8deb" + }, + { + "dataPath": "params_shard_88.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.4.self_attn.qkv_proj.weight", + "shape": [ + 8192, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "3a65d46a8dca4899ac05f0a2e382c3dd" + }, + { + "dataPath": "params_shard_89.bin", + "format": "raw-shard", + "nbytes": 37748736, + "records": [ + { + "name": "model.layers.4.self_attn.o_proj.weight", + "shape": [ + 4608, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 37748736, + "byteOffset": 0 + } + ], + "md5sum": "a60ab57c6d1ce3da7030047c7b491e70" + }, + { + "dataPath": "params_shard_90.bin", + "format": "raw-shard", + "nbytes": 339738624, + "records": [ + { + "name": "model.layers.5.mlp.down_proj.weight", + "shape": [ + 4608, + 36864 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 339738624, + "byteOffset": 0 + } + ], + "md5sum": "f27c1dc563fd7050411badbbd667e1c5" + }, + { + "dataPath": "params_shard_91.bin", + "format": "raw-shard", + "nbytes": 679477248, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.weight", + "shape": [ + 73728, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 679477248, + "byteOffset": 0 + } + ], + "md5sum": "73b24ad60ab31462446ce6602fa1a44e" + }, + { + "dataPath": "params_shard_92.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.5.self_attn.qkv_proj.weight", + "shape": [ + 8192, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "e831fef8e15fdf98ea271c1f80748476" + }, + { + "dataPath": "params_shard_93.bin", + "format": "raw-shard", + "nbytes": 37748736, + "records": [ + { + "name": "model.layers.5.self_attn.o_proj.weight", + "shape": [ + 4608, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 37748736, + "byteOffset": 0 + } + ], + "md5sum": "f1d2b99785dc024c66e656607780a26d" + }, + { + "dataPath": "params_shard_94.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.6.self_attn.qkv_proj.weight", + "shape": [ + 8192, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "471f65297ef8e62ff152af3fbbcb29af" + }, + { + "dataPath": "params_shard_95.bin", + "format": "raw-shard", + "nbytes": 37748736, + "records": [ + { + "name": "model.layers.6.self_attn.o_proj.weight", + "shape": [ + 4608, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 37748736, + "byteOffset": 0 + } + ], + "md5sum": "9f02c1eff8688c1446ed046a4e94d294" + }, + { + "dataPath": "params_shard_96.bin", + "format": "raw-shard", + "nbytes": 339738624, + "records": [ + { + "name": "model.layers.23.mlp.down_proj.weight", + "shape": [ + 4608, + 36864 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 339738624, + "byteOffset": 0 + } + ], + "md5sum": "299eec1f29838a1619f651b8ac2c2d2a" + }, + { + "dataPath": "params_shard_97.bin", + "format": "raw-shard", + "nbytes": 339738624, + "records": [ + { + "name": "model.layers.24.mlp.down_proj.weight", + "shape": [ + 4608, + 36864 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 339738624, + "byteOffset": 0 + } + ], + "md5sum": "48ee8aa14b6bedbf74ea626ccc6b66a9" + }, + { + "dataPath": "params_shard_98.bin", + "format": "raw-shard", + "nbytes": 679477248, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.weight", + "shape": [ + 73728, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 679477248, + "byteOffset": 0 + } + ], + "md5sum": "1d8c70a2fc2694d951a55a7614f2ee8f" + }, + { + "dataPath": "params_shard_99.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.24.self_attn.qkv_proj.weight", + "shape": [ + 8192, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "f2c72b1c908d4fe944fdab3339eaf781" + }, + { + "dataPath": "params_shard_100.bin", + "format": "raw-shard", + "nbytes": 37748736, + "records": [ + { + "name": "model.layers.24.self_attn.o_proj.weight", + "shape": [ + 4608, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 37748736, + "byteOffset": 0 + } + ], + "md5sum": "15c479aacfa31f1227b563789686152d" + }, + { + "dataPath": "params_shard_101.bin", + "format": "raw-shard", + "nbytes": 339738624, + "records": [ + { + "name": "model.layers.25.mlp.down_proj.weight", + "shape": [ + 4608, + 36864 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 339738624, + "byteOffset": 0 + } + ], + "md5sum": "24de14c07f42cb9d7142f9085470c02a" + }, + { + "dataPath": "params_shard_102.bin", + "format": "raw-shard", + "nbytes": 679477248, + "records": [ + { + "name": "model.layers.25.mlp.gate_up_proj.weight", + "shape": [ + 73728, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 679477248, + "byteOffset": 0 + } + ], + "md5sum": "561db5e2c4e9b20a00497d459e65fb0f" + }, + { + "dataPath": "params_shard_103.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.25.self_attn.qkv_proj.weight", + "shape": [ + 8192, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "1a8a3aec61c5bef5e6e7942fc2ba31b0" + }, + { + "dataPath": "params_shard_104.bin", + "format": "raw-shard", + "nbytes": 37748736, + "records": [ + { + "name": "model.layers.25.self_attn.o_proj.weight", + "shape": [ + 4608, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 37748736, + "byteOffset": 0 + } + ], + "md5sum": "d4b800d82bb141c8a3f0531e7dc8362f" + }, + { + "dataPath": "params_shard_105.bin", + "format": "raw-shard", + "nbytes": 339738624, + "records": [ + { + "name": "model.layers.26.mlp.down_proj.weight", + "shape": [ + 4608, + 36864 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 339738624, + "byteOffset": 0 + } + ], + "md5sum": "5f1d312f5a8aff7f31d45f49dba6b47a" + }, + { + "dataPath": "params_shard_106.bin", + "format": "raw-shard", + "nbytes": 679477248, + "records": [ + { + "name": "model.layers.26.mlp.gate_up_proj.weight", + "shape": [ + 73728, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 679477248, + "byteOffset": 0 + } + ], + "md5sum": "a68eb722f2505b2775c3b27591c531d3" + }, + { + "dataPath": "params_shard_107.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.26.self_attn.qkv_proj.weight", + "shape": [ + 8192, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "2e82c8779b7d5498268a303a1fe33c42" + }, + { + "dataPath": "params_shard_108.bin", + "format": "raw-shard", + "nbytes": 37748736, + "records": [ + { + "name": "model.layers.26.self_attn.o_proj.weight", + "shape": [ + 4608, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 37748736, + "byteOffset": 0 + } + ], + "md5sum": "d196249f35fa960330bf61163eaf14e6" + }, + { + "dataPath": "params_shard_109.bin", + "format": "raw-shard", + "nbytes": 339738624, + "records": [ + { + "name": "model.layers.27.mlp.down_proj.weight", + "shape": [ + 4608, + 36864 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 339738624, + "byteOffset": 0 + } + ], + "md5sum": "4915ebff7e72c5278a55d0525ae23bcf" + }, + { + "dataPath": "params_shard_110.bin", + "format": "raw-shard", + "nbytes": 679477248, + "records": [ + { + "name": "model.layers.27.mlp.gate_up_proj.weight", + "shape": [ + 73728, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 679477248, + "byteOffset": 0 + } + ], + "md5sum": "804a60e642f3dad08c7fcafd2637a225" + }, + { + "dataPath": "params_shard_111.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.27.self_attn.qkv_proj.weight", + "shape": [ + 8192, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "55b2212a596e3f28e9ef49f4e1d58f85" + }, + { + "dataPath": "params_shard_112.bin", + "format": "raw-shard", + "nbytes": 37748736, + "records": [ + { + "name": "model.layers.27.self_attn.o_proj.weight", + "shape": [ + 4608, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 37748736, + "byteOffset": 0 + } + ], + "md5sum": "04ce72e3c3f6f7aa3771e2c9941f9092" + }, + { + "dataPath": "params_shard_113.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.28.self_attn.qkv_proj.weight", + "shape": [ + 8192, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "99482aea32b7ab638410a48d1e389f61" + }, + { + "dataPath": "params_shard_114.bin", + "format": "raw-shard", + "nbytes": 37748736, + "records": [ + { + "name": "model.layers.28.self_attn.o_proj.weight", + "shape": [ + 4608, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 37748736, + "byteOffset": 0 + } + ], + "md5sum": "b0e38e30df4691211db0690a94039541" + }, + { + "dataPath": "params_shard_115.bin", + "format": "raw-shard", + "nbytes": 339738624, + "records": [ + { + "name": "model.layers.28.mlp.down_proj.weight", + "shape": [ + 4608, + 36864 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 339738624, + "byteOffset": 0 + } + ], + "md5sum": "628902c3c3c8e71990dccd546e73079b" + }, + { + "dataPath": "params_shard_116.bin", + "format": "raw-shard", + "nbytes": 679477248, + "records": [ + { + "name": "model.layers.28.mlp.gate_up_proj.weight", + "shape": [ + 73728, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 679477248, + "byteOffset": 0 + } + ], + "md5sum": "0249fa2584c4e66e6217d6b4f2aa673c" + }, + { + "dataPath": "params_shard_117.bin", + "format": "raw-shard", + "nbytes": 339738624, + "records": [ + { + "name": "model.layers.29.mlp.down_proj.weight", + "shape": [ + 4608, + 36864 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 339738624, + "byteOffset": 0 + } + ], + "md5sum": "7e7ded135f939f346bdf016d8457982e" + }, + { + "dataPath": "params_shard_118.bin", + "format": "raw-shard", + "nbytes": 679477248, + "records": [ + { + "name": "model.layers.29.mlp.gate_up_proj.weight", + "shape": [ + 73728, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 679477248, + "byteOffset": 0 + } + ], + "md5sum": "d33319299cd0ce8206cb31d488a0d57f" + }, + { + "dataPath": "params_shard_119.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.29.self_attn.qkv_proj.weight", + "shape": [ + 8192, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "69703b0f869124b81aa722f5455e240a" + }, + { + "dataPath": "params_shard_120.bin", + "format": "raw-shard", + "nbytes": 37748736, + "records": [ + { + "name": "model.layers.29.self_attn.o_proj.weight", + "shape": [ + 4608, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 37748736, + "byteOffset": 0 + } + ], + "md5sum": "535ca2d671d58d7416a81e4843331b09" + }, + { + "dataPath": "params_shard_121.bin", + "format": "raw-shard", + "nbytes": 339738624, + "records": [ + { + "name": "model.layers.30.mlp.down_proj.weight", + "shape": [ + 4608, + 36864 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 339738624, + "byteOffset": 0 + } + ], + "md5sum": "1b2c8133ed20173bab22eedac23723c2" + }, + { + "dataPath": "params_shard_122.bin", + "format": "raw-shard", + "nbytes": 679477248, + "records": [ + { + "name": "model.layers.30.mlp.gate_up_proj.weight", + "shape": [ + 73728, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 679477248, + "byteOffset": 0 + } + ], + "md5sum": "b47ca8b0c1b07aa6165cb3a6f9aee351" + }, + { + "dataPath": "params_shard_123.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.30.self_attn.qkv_proj.weight", + "shape": [ + 8192, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "05d454a922c916dadf49c4c393f330fd" + }, + { + "dataPath": "params_shard_124.bin", + "format": "raw-shard", + "nbytes": 37748736, + "records": [ + { + "name": "model.layers.30.self_attn.o_proj.weight", + "shape": [ + 4608, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 37748736, + "byteOffset": 0 + } + ], + "md5sum": "63856a22d3fe94c5e7b826a664d8fe16" + }, + { + "dataPath": "params_shard_125.bin", + "format": "raw-shard", + "nbytes": 339738624, + "records": [ + { + "name": "model.layers.31.mlp.down_proj.weight", + "shape": [ + 4608, + 36864 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 339738624, + "byteOffset": 0 + } + ], + "md5sum": "f189245c12adcdbabb594dfd63004834" + }, + { + "dataPath": "params_shard_126.bin", + "format": "raw-shard", + "nbytes": 679477248, + "records": [ + { + "name": "model.layers.31.mlp.gate_up_proj.weight", + "shape": [ + 73728, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 679477248, + "byteOffset": 0 + } + ], + "md5sum": "d783d0c7f7ce0d9ab67ca8cc88683a72" + }, + { + "dataPath": "params_shard_127.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.31.self_attn.qkv_proj.weight", + "shape": [ + 8192, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "37b47716e6cab80d51d2ef22da02423a" + }, + { + "dataPath": "params_shard_128.bin", + "format": "raw-shard", + "nbytes": 37748736, + "records": [ + { + "name": "model.layers.31.self_attn.o_proj.weight", + "shape": [ + 4608, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 37748736, + "byteOffset": 0 + } + ], + "md5sum": "93136414d590df68e40cd9b7c8d54642" + }, + { + "dataPath": "params_shard_129.bin", + "format": "raw-shard", + "nbytes": 679477248, + "records": [ + { + "name": "model.layers.32.mlp.gate_up_proj.weight", + "shape": [ + 73728, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 679477248, + "byteOffset": 0 + } + ], + "md5sum": "8cebf3bf42c0be0fe4615e091045e5f1" + }, + { + "dataPath": "params_shard_130.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.32.self_attn.qkv_proj.weight", + "shape": [ + 8192, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "d4a7221be2b3875d63bc943db6c7e9ea" + }, + { + "dataPath": "params_shard_131.bin", + "format": "raw-shard", + "nbytes": 37748736, + "records": [ + { + "name": "model.layers.32.self_attn.o_proj.weight", + "shape": [ + 4608, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 37748736, + "byteOffset": 0 + } + ], + "md5sum": "51482b34c93adf5591e4d20eb2fe9cc5" + }, + { + "dataPath": "params_shard_132.bin", + "format": "raw-shard", + "nbytes": 339738624, + "records": [ + { + "name": "model.layers.32.mlp.down_proj.weight", + "shape": [ + 4608, + 36864 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 339738624, + "byteOffset": 0 + } + ], + "md5sum": "a938aa19af2ac5e1a7be4a9d62afbb4a" + }, + { + "dataPath": "params_shard_133.bin", + "format": "raw-shard", + "nbytes": 339738624, + "records": [ + { + "name": "model.layers.33.mlp.down_proj.weight", + "shape": [ + 4608, + 36864 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 339738624, + "byteOffset": 0 + } + ], + "md5sum": "48de06f22b7d9f7e1062969b08f59bee" + }, + { + "dataPath": "params_shard_134.bin", + "format": "raw-shard", + "nbytes": 679477248, + "records": [ + { + "name": "model.layers.33.mlp.gate_up_proj.weight", + "shape": [ + 73728, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 679477248, + "byteOffset": 0 + } + ], + "md5sum": "c3cd145b1d52e5a789bacd5852d757cf" + }, + { + "dataPath": "params_shard_135.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.33.self_attn.qkv_proj.weight", + "shape": [ + 8192, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "c5e75c19618091f7cc441e82cacdb178" + }, + { + "dataPath": "params_shard_136.bin", + "format": "raw-shard", + "nbytes": 37748736, + "records": [ + { + "name": "model.layers.33.self_attn.o_proj.weight", + "shape": [ + 4608, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 37748736, + "byteOffset": 0 + } + ], + "md5sum": "a9328660ee78726d415ecedb73a05595" + }, + { + "dataPath": "params_shard_137.bin", + "format": "raw-shard", + "nbytes": 339738624, + "records": [ + { + "name": "model.layers.34.mlp.down_proj.weight", + "shape": [ + 4608, + 36864 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 339738624, + "byteOffset": 0 + } + ], + "md5sum": "b5400774e11eb7479e59682fcf48ca2f" + }, + { + "dataPath": "params_shard_138.bin", + "format": "raw-shard", + "nbytes": 679477248, + "records": [ + { + "name": "model.layers.34.mlp.gate_up_proj.weight", + "shape": [ + 73728, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 679477248, + "byteOffset": 0 + } + ], + "md5sum": "a592b06aeec1f5c5e9b8fed80352647e" + }, + { + "dataPath": "params_shard_139.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.34.self_attn.qkv_proj.weight", + "shape": [ + 8192, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "550804e4f5d0aa2c564f8a6d370914af" + }, + { + "dataPath": "params_shard_140.bin", + "format": "raw-shard", + "nbytes": 37748736, + "records": [ + { + "name": "model.layers.34.self_attn.o_proj.weight", + "shape": [ + 4608, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 37748736, + "byteOffset": 0 + } + ], + "md5sum": "a535254722180c0a0e340615721be681" + }, + { + "dataPath": "params_shard_141.bin", + "format": "raw-shard", + "nbytes": 339738624, + "records": [ + { + "name": "model.layers.35.mlp.down_proj.weight", + "shape": [ + 4608, + 36864 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 339738624, + "byteOffset": 0 + } + ], + "md5sum": "d19ca4e5552327dbf359da82fff07851" + }, + { + "dataPath": "params_shard_142.bin", + "format": "raw-shard", + "nbytes": 679477248, + "records": [ + { + "name": "model.layers.35.mlp.gate_up_proj.weight", + "shape": [ + 73728, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 679477248, + "byteOffset": 0 + } + ], + "md5sum": "d84a53918f7caa3fc1d55e90c5ff3063" + }, + { + "dataPath": "params_shard_143.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.35.self_attn.qkv_proj.weight", + "shape": [ + 8192, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "75ff61a4d93ae52a38cee962586e09d4" + }, + { + "dataPath": "params_shard_144.bin", + "format": "raw-shard", + "nbytes": 37748736, + "records": [ + { + "name": "model.layers.35.self_attn.o_proj.weight", + "shape": [ + 4608, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 37748736, + "byteOffset": 0 + } + ], + "md5sum": "4b3d7f04e78cdea4fd912782085059fe" + }, + { + "dataPath": "params_shard_145.bin", + "format": "raw-shard", + "nbytes": 679477248, + "records": [ + { + "name": "model.layers.36.mlp.gate_up_proj.weight", + "shape": [ + 73728, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 679477248, + "byteOffset": 0 + } + ], + "md5sum": "2f77cea9990c9b182caaf01fc65731aa" + }, + { + "dataPath": "params_shard_146.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.36.self_attn.qkv_proj.weight", + "shape": [ + 8192, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "bf682f566b1f0f005d98b2165f706985" + }, + { + "dataPath": "params_shard_147.bin", + "format": "raw-shard", + "nbytes": 37748736, + "records": [ + { + "name": "model.layers.36.self_attn.o_proj.weight", + "shape": [ + 4608, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 37748736, + "byteOffset": 0 + } + ], + "md5sum": "f6e31dad96223a2203e3abc4aeab63b6" + }, + { + "dataPath": "params_shard_148.bin", + "format": "raw-shard", + "nbytes": 339738624, + "records": [ + { + "name": "model.layers.36.mlp.down_proj.weight", + "shape": [ + 4608, + 36864 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 339738624, + "byteOffset": 0 + } + ], + "md5sum": "f57d8dc2aeace971196913bdb3bb20a7" + }, + { + "dataPath": "params_shard_149.bin", + "format": "raw-shard", + "nbytes": 339738624, + "records": [ + { + "name": "model.layers.37.mlp.down_proj.weight", + "shape": [ + 4608, + 36864 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 339738624, + "byteOffset": 0 + } + ], + "md5sum": "43f05564b30a41d16b74a9a2a58461c5" + }, + { + "dataPath": "params_shard_150.bin", + "format": "raw-shard", + "nbytes": 679477248, + "records": [ + { + "name": "model.layers.37.mlp.gate_up_proj.weight", + "shape": [ + 73728, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 679477248, + "byteOffset": 0 + } + ], + "md5sum": "15ef6e151987113b53eb6b8bf553cb93" + }, + { + "dataPath": "params_shard_151.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.37.self_attn.qkv_proj.weight", + "shape": [ + 8192, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "ba6c983dcd13cb7ebe67393fddb18ebd" + }, + { + "dataPath": "params_shard_152.bin", + "format": "raw-shard", + "nbytes": 37748736, + "records": [ + { + "name": "model.layers.37.self_attn.o_proj.weight", + "shape": [ + 4608, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 37748736, + "byteOffset": 0 + } + ], + "md5sum": "89b326f0bbc270c5d848211b33f75282" + }, + { + "dataPath": "params_shard_153.bin", + "format": "raw-shard", + "nbytes": 339738624, + "records": [ + { + "name": "model.layers.38.mlp.down_proj.weight", + "shape": [ + 4608, + 36864 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 339738624, + "byteOffset": 0 + } + ], + "md5sum": "6d6c98526eeecee414755d1b9eb218c2" + }, + { + "dataPath": "params_shard_154.bin", + "format": "raw-shard", + "nbytes": 679477248, + "records": [ + { + "name": "model.layers.38.mlp.gate_up_proj.weight", + "shape": [ + 73728, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 679477248, + "byteOffset": 0 + } + ], + "md5sum": "efc27d1bfeeae5a55d187023d814fda7" + }, + { + "dataPath": "params_shard_155.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.38.self_attn.qkv_proj.weight", + "shape": [ + 8192, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "89d55c72ca5f2455acdf2a47c3c206fd" + }, + { + "dataPath": "params_shard_156.bin", + "format": "raw-shard", + "nbytes": 37748736, + "records": [ + { + "name": "model.layers.38.self_attn.o_proj.weight", + "shape": [ + 4608, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 37748736, + "byteOffset": 0 + } + ], + "md5sum": "53463953a04a04caaa844b11a083b559" + }, + { + "dataPath": "params_shard_157.bin", + "format": "raw-shard", + "nbytes": 339738624, + "records": [ + { + "name": "model.layers.39.mlp.down_proj.weight", + "shape": [ + 4608, + 36864 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 339738624, + "byteOffset": 0 + } + ], + "md5sum": "e2f625fccb46df5fe8783092cf1f38b8" + }, + { + "dataPath": "params_shard_158.bin", + "format": "raw-shard", + "nbytes": 679477248, + "records": [ + { + "name": "model.layers.39.mlp.gate_up_proj.weight", + "shape": [ + 73728, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 679477248, + "byteOffset": 0 + } + ], + "md5sum": "4e18e425f8893842443b76e0f065048f" + }, + { + "dataPath": "params_shard_159.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.39.self_attn.qkv_proj.weight", + "shape": [ + 8192, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "2d936649571d6961692ddc69d0a5db70" + }, + { + "dataPath": "params_shard_160.bin", + "format": "raw-shard", + "nbytes": 37748736, + "records": [ + { + "name": "model.layers.39.self_attn.o_proj.weight", + "shape": [ + 4608, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 37748736, + "byteOffset": 0 + } + ], + "md5sum": "bc34cc6fe4a37feac389b36fe18dd2d0" + }, + { + "dataPath": "params_shard_161.bin", + "format": "raw-shard", + "nbytes": 339738624, + "records": [ + { + "name": "model.layers.40.mlp.down_proj.weight", + "shape": [ + 4608, + 36864 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 339738624, + "byteOffset": 0 + } + ], + "md5sum": "03831ad4f8f9ae173b2f5c7d07dafa1a" + }, + { + "dataPath": "params_shard_162.bin", + "format": "raw-shard", + "nbytes": 679477248, + "records": [ + { + "name": "model.layers.40.mlp.gate_up_proj.weight", + "shape": [ + 73728, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 679477248, + "byteOffset": 0 + } + ], + "md5sum": "c8e9e9fc07c29838d7c8f03606f6867b" + }, + { + "dataPath": "params_shard_163.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.40.self_attn.qkv_proj.weight", + "shape": [ + 8192, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "1e11442984dc3747db6f25e2bfbfd5d2" + }, + { + "dataPath": "params_shard_164.bin", + "format": "raw-shard", + "nbytes": 37748736, + "records": [ + { + "name": "model.layers.40.self_attn.o_proj.weight", + "shape": [ + 4608, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 37748736, + "byteOffset": 0 + } + ], + "md5sum": "6b4b772980ae3f6c831e1cdbcb29d787" + }, + { + "dataPath": "params_shard_165.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.41.self_attn.qkv_proj.weight", + "shape": [ + 8192, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "1ad9afaadf62bf00e595cdd08e203696" + }, + { + "dataPath": "params_shard_166.bin", + "format": "raw-shard", + "nbytes": 37748736, + "records": [ + { + "name": "model.layers.41.self_attn.o_proj.weight", + "shape": [ + 4608, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 37748736, + "byteOffset": 0 + } + ], + "md5sum": "892bc0ed228f6ffee8747838ff18ef18" + }, + { + "dataPath": "params_shard_167.bin", + "format": "raw-shard", + "nbytes": 339738624, + "records": [ + { + "name": "model.layers.41.mlp.down_proj.weight", + "shape": [ + 4608, + 36864 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 339738624, + "byteOffset": 0 + } + ], + "md5sum": "b7eff25eec698c74beb6c2e815a5c979" + }, + { + "dataPath": "params_shard_168.bin", + "format": "raw-shard", + "nbytes": 679477248, + "records": [ + { + "name": "model.layers.41.mlp.gate_up_proj.weight", + "shape": [ + 73728, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 679477248, + "byteOffset": 0 + } + ], + "md5sum": "80d03d9a7dd3d5cdc45175f715abc76f" + }, + { + "dataPath": "params_shard_169.bin", + "format": "raw-shard", + "nbytes": 339738624, + "records": [ + { + "name": "model.layers.42.mlp.down_proj.weight", + "shape": [ + 4608, + 36864 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 339738624, + "byteOffset": 0 + } + ], + "md5sum": "35370a4adb178622ed43e4d56c5c934e" + }, + { + "dataPath": "params_shard_170.bin", + "format": "raw-shard", + "nbytes": 679477248, + "records": [ + { + "name": "model.layers.42.mlp.gate_up_proj.weight", + "shape": [ + 73728, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 679477248, + "byteOffset": 0 + } + ], + "md5sum": "368303116313404fd25eed9653c855d7" + }, + { + "dataPath": "params_shard_171.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.42.self_attn.qkv_proj.weight", + "shape": [ + 8192, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "47d4437856896ed12b829a78bed5a6eb" + }, + { + "dataPath": "params_shard_172.bin", + "format": "raw-shard", + "nbytes": 37748736, + "records": [ + { + "name": "model.layers.42.self_attn.o_proj.weight", + "shape": [ + 4608, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 37748736, + "byteOffset": 0 + } + ], + "md5sum": "5adebc49547de2e6f7bb6851b03f028a" + }, + { + "dataPath": "params_shard_173.bin", + "format": "raw-shard", + "nbytes": 339738624, + "records": [ + { + "name": "model.layers.43.mlp.down_proj.weight", + "shape": [ + 4608, + 36864 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 339738624, + "byteOffset": 0 + } + ], + "md5sum": "2e6dcc7e7418af3d7fb92d5aa42c4d08" + }, + { + "dataPath": "params_shard_174.bin", + "format": "raw-shard", + "nbytes": 679477248, + "records": [ + { + "name": "model.layers.43.mlp.gate_up_proj.weight", + "shape": [ + 73728, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 679477248, + "byteOffset": 0 + } + ], + "md5sum": "432b8c18203509aacdd770908d566e7e" + }, + { + "dataPath": "params_shard_175.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.43.self_attn.qkv_proj.weight", + "shape": [ + 8192, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "6f0c116284eb0dc45ef46dad7746aa1c" + }, + { + "dataPath": "params_shard_176.bin", + "format": "raw-shard", + "nbytes": 37748736, + "records": [ + { + "name": "model.layers.43.self_attn.o_proj.weight", + "shape": [ + 4608, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 37748736, + "byteOffset": 0 + } + ], + "md5sum": "f6ba3f7b643bec7f8446f04217afb0b9" + }, + { + "dataPath": "params_shard_177.bin", + "format": "raw-shard", + "nbytes": 339738624, + "records": [ + { + "name": "model.layers.44.mlp.down_proj.weight", + "shape": [ + 4608, + 36864 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 339738624, + "byteOffset": 0 + } + ], + "md5sum": "e398958a8be1e10a8478d1d2e15fb14c" + }, + { + "dataPath": "params_shard_178.bin", + "format": "raw-shard", + "nbytes": 679477248, + "records": [ + { + "name": "model.layers.44.mlp.gate_up_proj.weight", + "shape": [ + 73728, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 679477248, + "byteOffset": 0 + } + ], + "md5sum": "0c38211e4f037b8d63443123ec8d8496" + }, + { + "dataPath": "params_shard_179.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.44.self_attn.qkv_proj.weight", + "shape": [ + 8192, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "c58f41bfdbd0c5d432dd75ae2225c817" + }, + { + "dataPath": "params_shard_180.bin", + "format": "raw-shard", + "nbytes": 37748736, + "records": [ + { + "name": "model.layers.44.self_attn.o_proj.weight", + "shape": [ + 4608, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 37748736, + "byteOffset": 0 + } + ], + "md5sum": "1fa5a61b58c9f02732d4e1463d092599" + }, + { + "dataPath": "params_shard_181.bin", + "format": "raw-shard", + "nbytes": 679477248, + "records": [ + { + "name": "model.layers.45.mlp.gate_up_proj.weight", + "shape": [ + 73728, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 679477248, + "byteOffset": 0 + } + ], + "md5sum": "68b213f87a3df7334432a82dfd68ca8b" + }, + { + "dataPath": "params_shard_182.bin", + "format": "raw-shard", + "nbytes": 75497472, + "records": [ + { + "name": "model.layers.45.self_attn.qkv_proj.weight", + "shape": [ + 8192, + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 75497472, + "byteOffset": 0 + } + ], + "md5sum": "2b47a8861441bdf53f9dac6ce02980e3" + }, + { + "dataPath": "params_shard_183.bin", + "format": "raw-shard", + "nbytes": 37748736, + "records": [ + { + "name": "model.layers.45.self_attn.o_proj.weight", + "shape": [ + 4608, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 37748736, + "byteOffset": 0 + } + ], + "md5sum": "21bf969aca87c0a3b40035d5cddf4b68" + }, + { + "dataPath": "params_shard_184.bin", + "format": "raw-shard", + "nbytes": 339738624, + "records": [ + { + "name": "model.layers.45.mlp.down_proj.weight", + "shape": [ + 4608, + 36864 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 339738624, + "byteOffset": 0 + } + ], + "md5sum": "dbd18d3a38c3bad9a0b9198a90fa9760" + }, + { + "dataPath": "params_shard_185.bin", + "format": "raw-shard", + "nbytes": 1704960, + "records": [ + { + "name": "model.layers.0.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 0 + }, + { + "name": "model.layers.0.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 9216 + }, + { + "name": "model.layers.0.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 18432 + }, + { + "name": "model.layers.0.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 27648 + }, + { + "name": "model.layers.1.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 36864 + }, + { + "name": "model.layers.1.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 46080 + }, + { + "name": "model.layers.1.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 55296 + }, + { + "name": "model.layers.1.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 64512 + }, + { + "name": "model.layers.10.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 73728 + }, + { + "name": "model.layers.10.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 82944 + }, + { + "name": "model.layers.10.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 92160 + }, + { + "name": "model.layers.10.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 101376 + }, + { + "name": "model.layers.11.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 110592 + }, + { + "name": "model.layers.11.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 119808 + }, + { + "name": "model.layers.11.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 129024 + }, + { + "name": "model.layers.11.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 138240 + }, + { + "name": "model.layers.12.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 147456 + }, + { + "name": "model.layers.12.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 156672 + }, + { + "name": "model.layers.12.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 165888 + }, + { + "name": "model.layers.12.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 175104 + }, + { + "name": "model.layers.13.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 184320 + }, + { + "name": "model.layers.13.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 193536 + }, + { + "name": "model.layers.13.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 202752 + }, + { + "name": "model.layers.13.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 211968 + }, + { + "name": "model.layers.14.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 221184 + }, + { + "name": "model.layers.14.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 230400 + }, + { + "name": "model.layers.14.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 239616 + }, + { + "name": "model.layers.14.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 248832 + }, + { + "name": "model.layers.6.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 258048 + }, + { + "name": "model.layers.6.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 267264 + }, + { + "name": "model.layers.6.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 276480 + }, + { + "name": "model.layers.6.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 285696 + }, + { + "name": "model.layers.7.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 294912 + }, + { + "name": "model.layers.7.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 304128 + }, + { + "name": "model.layers.7.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 313344 + }, + { + "name": "model.layers.7.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 322560 + }, + { + "name": "model.layers.8.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 331776 + }, + { + "name": "model.layers.8.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 340992 + }, + { + "name": "model.layers.8.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 350208 + }, + { + "name": "model.layers.8.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 359424 + }, + { + "name": "model.layers.9.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 368640 + }, + { + "name": "model.layers.9.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 377856 + }, + { + "name": "model.layers.9.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 387072 + }, + { + "name": "model.layers.9.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 396288 + }, + { + "name": "model.layers.15.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 405504 + }, + { + "name": "model.layers.15.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 414720 + }, + { + "name": "model.layers.15.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 423936 + }, + { + "name": "model.layers.15.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 433152 + }, + { + "name": "model.layers.16.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 442368 + }, + { + "name": "model.layers.16.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 451584 + }, + { + "name": "model.layers.16.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 460800 + }, + { + "name": "model.layers.16.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 470016 + }, + { + "name": "model.layers.17.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 479232 + }, + { + "name": "model.layers.17.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 488448 + }, + { + "name": "model.layers.17.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 497664 + }, + { + "name": "model.layers.17.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 506880 + }, + { + "name": "model.layers.18.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 516096 + }, + { + "name": "model.layers.18.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 525312 + }, + { + "name": "model.layers.18.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 534528 + }, + { + "name": "model.layers.18.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 543744 + }, + { + "name": "model.layers.19.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 552960 + }, + { + "name": "model.layers.19.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 562176 + }, + { + "name": "model.layers.19.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 571392 + }, + { + "name": "model.layers.19.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 580608 + }, + { + "name": "model.layers.20.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 589824 + }, + { + "name": "model.layers.20.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 599040 + }, + { + "name": "model.layers.20.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 608256 + }, + { + "name": "model.layers.20.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 617472 + }, + { + "name": "model.layers.21.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 626688 + }, + { + "name": "model.layers.21.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 635904 + }, + { + "name": "model.layers.21.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 645120 + }, + { + "name": "model.layers.21.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 654336 + }, + { + "name": "model.layers.22.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 663552 + }, + { + "name": "model.layers.22.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 672768 + }, + { + "name": "model.layers.22.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 681984 + }, + { + "name": "model.layers.22.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 691200 + }, + { + "name": "model.layers.2.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 700416 + }, + { + "name": "model.layers.2.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 709632 + }, + { + "name": "model.layers.2.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 718848 + }, + { + "name": "model.layers.2.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 728064 + }, + { + "name": "model.layers.3.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 737280 + }, + { + "name": "model.layers.3.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 746496 + }, + { + "name": "model.layers.3.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 755712 + }, + { + "name": "model.layers.3.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 764928 + }, + { + "name": "model.layers.4.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 774144 + }, + { + "name": "model.layers.4.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 783360 + }, + { + "name": "model.layers.4.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 792576 + }, + { + "name": "model.layers.4.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 801792 + }, + { + "name": "model.layers.5.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 811008 + }, + { + "name": "model.layers.5.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 820224 + }, + { + "name": "model.layers.5.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 829440 + }, + { + "name": "model.layers.5.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 838656 + }, + { + "name": "model.layers.23.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 847872 + }, + { + "name": "model.layers.23.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 857088 + }, + { + "name": "model.layers.23.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 866304 + }, + { + "name": "model.layers.23.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 875520 + }, + { + "name": "model.layers.24.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 884736 + }, + { + "name": "model.layers.24.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 893952 + }, + { + "name": "model.layers.24.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 903168 + }, + { + "name": "model.layers.24.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 912384 + }, + { + "name": "model.layers.25.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 921600 + }, + { + "name": "model.layers.25.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 930816 + }, + { + "name": "model.layers.25.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 940032 + }, + { + "name": "model.layers.25.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 949248 + }, + { + "name": "model.layers.26.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 958464 + }, + { + "name": "model.layers.26.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 967680 + }, + { + "name": "model.layers.26.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 976896 + }, + { + "name": "model.layers.26.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 986112 + }, + { + "name": "model.layers.27.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 995328 + }, + { + "name": "model.layers.27.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 1004544 + }, + { + "name": "model.layers.27.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 1013760 + }, + { + "name": "model.layers.27.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 1022976 + }, + { + "name": "model.layers.28.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 1032192 + }, + { + "name": "model.layers.28.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 1041408 + }, + { + "name": "model.layers.28.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 1050624 + }, + { + "name": "model.layers.28.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 1059840 + }, + { + "name": "model.layers.29.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 1069056 + }, + { + "name": "model.layers.29.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 1078272 + }, + { + "name": "model.layers.29.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 1087488 + }, + { + "name": "model.layers.29.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 1096704 + }, + { + "name": "model.layers.30.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 1105920 + }, + { + "name": "model.layers.30.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 1115136 + }, + { + "name": "model.layers.30.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 1124352 + }, + { + "name": "model.layers.30.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 1133568 + }, + { + "name": "model.layers.31.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 1142784 + }, + { + "name": "model.layers.31.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 1152000 + }, + { + "name": "model.layers.31.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 1161216 + }, + { + "name": "model.layers.31.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 1170432 + }, + { + "name": "model.layers.32.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 1179648 + }, + { + "name": "model.layers.32.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 1188864 + }, + { + "name": "model.layers.32.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 1198080 + }, + { + "name": "model.layers.32.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 1207296 + }, + { + "name": "model.layers.33.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 1216512 + }, + { + "name": "model.layers.33.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 1225728 + }, + { + "name": "model.layers.33.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 1234944 + }, + { + "name": "model.layers.33.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 1244160 + }, + { + "name": "model.layers.34.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 1253376 + }, + { + "name": "model.layers.34.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 1262592 + }, + { + "name": "model.layers.34.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 1271808 + }, + { + "name": "model.layers.34.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 1281024 + }, + { + "name": "model.layers.35.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 1290240 + }, + { + "name": "model.layers.35.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 1299456 + }, + { + "name": "model.layers.35.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 1308672 + }, + { + "name": "model.layers.35.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 1317888 + }, + { + "name": "model.layers.36.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 1327104 + }, + { + "name": "model.layers.36.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 1336320 + }, + { + "name": "model.layers.36.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 1345536 + }, + { + "name": "model.layers.36.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 1354752 + }, + { + "name": "model.layers.37.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 1363968 + }, + { + "name": "model.layers.37.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 1373184 + }, + { + "name": "model.layers.37.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 1382400 + }, + { + "name": "model.layers.37.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 1391616 + }, + { + "name": "model.layers.38.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 1400832 + }, + { + "name": "model.layers.38.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 1410048 + }, + { + "name": "model.layers.38.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 1419264 + }, + { + "name": "model.layers.38.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 1428480 + }, + { + "name": "model.layers.39.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 1437696 + }, + { + "name": "model.layers.39.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 1446912 + }, + { + "name": "model.layers.39.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 1456128 + }, + { + "name": "model.layers.39.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 1465344 + }, + { + "name": "model.layers.40.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 1474560 + }, + { + "name": "model.layers.40.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 1483776 + }, + { + "name": "model.layers.40.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 1492992 + }, + { + "name": "model.layers.40.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 1502208 + }, + { + "name": "model.layers.41.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 1511424 + }, + { + "name": "model.layers.41.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 1520640 + }, + { + "name": "model.layers.41.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 1529856 + }, + { + "name": "model.layers.41.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 1539072 + }, + { + "name": "model.layers.42.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 1548288 + }, + { + "name": "model.layers.42.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 1557504 + }, + { + "name": "model.layers.42.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 1566720 + }, + { + "name": "model.layers.42.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 1575936 + }, + { + "name": "model.layers.43.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 1585152 + }, + { + "name": "model.layers.43.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 1594368 + }, + { + "name": "model.layers.43.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 1603584 + }, + { + "name": "model.layers.43.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 1612800 + }, + { + "name": "model.layers.44.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 1622016 + }, + { + "name": "model.layers.44.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 1631232 + }, + { + "name": "model.layers.44.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 1640448 + }, + { + "name": "model.layers.44.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 1649664 + }, + { + "name": "model.layers.45.input_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 1658880 + }, + { + "name": "model.layers.45.post_attention_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 1668096 + }, + { + "name": "model.layers.45.post_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 1677312 + }, + { + "name": "model.layers.45.pre_feedforward_layernorm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 1686528 + }, + { + "name": "model.norm.weight", + "shape": [ + 4608 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 9216, + "byteOffset": 1695744 + } + ], + "md5sum": "ac76064680a74df4c5cfd5b812c392d0" + } + ] +} \ No newline at end of file