{ "metadata": { "ParamSize": 370, "ParamBytes": 54454256640.0, "BitsPerParam": 14.686606395927337 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 2359296000, "records": [ { "name": "model.embed_tokens.weight", "shape": [ 256000, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2359296000, "byteOffset": 0 } ], "md5sum": "dc36e04174ceac3d7d23463b5d0d5181" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 339738624, "records": [ { "name": "model.layers.0.mlp.down_proj.weight", "shape": [ 4608, 36864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 339738624, "byteOffset": 0 } ], "md5sum": "4f0e853727e47613806ffa9c4c482ff2" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 679477248, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.weight", "shape": [ 73728, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 679477248, "byteOffset": 0 } ], "md5sum": "21fc9a247c7e078674e21f55791089e3" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.0.self_attn.qkv_proj.weight", "shape": [ 8192, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "7b6c03f580fe4119384785ef11ca4387" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 37748736, "records": [ { "name": "model.layers.0.self_attn.o_proj.weight", "shape": [ 4608, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 37748736, "byteOffset": 0 } ], "md5sum": "0a4c23fe136646c69827d68a51d94fff" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 339738624, "records": [ { "name": "model.layers.1.mlp.down_proj.weight", "shape": [ 4608, 36864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 339738624, "byteOffset": 0 } ], "md5sum": "6b485dacf260f4fa5690ad5f9c056cdb" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 679477248, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.weight", "shape": [ 73728, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 679477248, "byteOffset": 0 } ], "md5sum": "72d93a6fa7ce8ef45f7e92291de659a7" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.1.self_attn.qkv_proj.weight", "shape": [ 8192, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "faebe2c0b094d27995cea1fb726b162a" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 37748736, "records": [ { "name": "model.layers.1.self_attn.o_proj.weight", "shape": [ 4608, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 37748736, "byteOffset": 0 } ], "md5sum": "b7486ddadec0d8c5b2cdfbd015070d84" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.2.self_attn.qkv_proj.weight", "shape": [ 8192, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "a29efe9166112e96288dbca63318d224" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 37748736, "records": [ { "name": "model.layers.2.self_attn.o_proj.weight", "shape": [ 4608, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 37748736, "byteOffset": 0 } ], "md5sum": "d6c07888f84c4d7b9207c8af8812c8c8" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 339738624, "records": [ { "name": "model.layers.10.mlp.down_proj.weight", "shape": [ 4608, 36864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 339738624, "byteOffset": 0 } ], "md5sum": "cc641d45e6a9f09da1abeb52f359a23a" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 339738624, "records": [ { "name": "model.layers.11.mlp.down_proj.weight", "shape": [ 4608, 36864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 339738624, "byteOffset": 0 } ], "md5sum": "f04105260c6b8b5cea9222a47bc2eed2" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 679477248, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.weight", "shape": [ 73728, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 679477248, "byteOffset": 0 } ], "md5sum": "c88be0bc9d4893fcd7ec78bc4190a4b5" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.11.self_attn.qkv_proj.weight", "shape": [ 8192, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "96bc024d0029894355a306ed1ca2c93a" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 37748736, "records": [ { "name": "model.layers.11.self_attn.o_proj.weight", "shape": [ 4608, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 37748736, "byteOffset": 0 } ], "md5sum": "eea52126cae035d5862119a6130f4b95" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 339738624, "records": [ { "name": "model.layers.12.mlp.down_proj.weight", "shape": [ 4608, 36864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 339738624, "byteOffset": 0 } ], "md5sum": "c35f2943c085189db06e8fa5fdefcac2" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 679477248, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.weight", "shape": [ 73728, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 679477248, "byteOffset": 0 } ], "md5sum": "e2f728f041fea40538f97df2a74b6a61" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.12.self_attn.qkv_proj.weight", "shape": [ 8192, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "53977f25830b1d83063393cf6e06e11b" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 37748736, "records": [ { "name": "model.layers.12.self_attn.o_proj.weight", "shape": [ 4608, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 37748736, "byteOffset": 0 } ], "md5sum": "1d7c1b03ed5c22fd10e07f222ce40343" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 339738624, "records": [ { "name": "model.layers.13.mlp.down_proj.weight", "shape": [ 4608, 36864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 339738624, "byteOffset": 0 } ], "md5sum": "2a9e7c74a08e7d4a740ef5cc1d16e1e4" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 679477248, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.weight", "shape": [ 73728, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 679477248, "byteOffset": 0 } ], "md5sum": "60f7265756557068a21f4a74a00e5065" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.13.self_attn.qkv_proj.weight", "shape": [ 8192, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "2f08ac47e48250c696cf73e100725deb" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 37748736, "records": [ { "name": "model.layers.13.self_attn.o_proj.weight", "shape": [ 4608, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 37748736, "byteOffset": 0 } ], "md5sum": "0f343ca21f549dfe46fc541759f83444" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 339738624, "records": [ { "name": "model.layers.14.mlp.down_proj.weight", "shape": [ 4608, 36864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 339738624, "byteOffset": 0 } ], "md5sum": "3c573cb4a07c2dbf1ba61c2bbdbe7da5" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 679477248, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.weight", "shape": [ 73728, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 679477248, "byteOffset": 0 } ], "md5sum": "f953f21956b50b4bc9e2d1b584883141" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.14.self_attn.qkv_proj.weight", "shape": [ 8192, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "ce383a398f02af492b0bc7b8eef97aaa" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 37748736, "records": [ { "name": "model.layers.14.self_attn.o_proj.weight", "shape": [ 4608, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 37748736, "byteOffset": 0 } ], "md5sum": "06b9fa6b136e7cdc917af3a48d5f696a" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.15.self_attn.qkv_proj.weight", "shape": [ 8192, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "fb73d3cf5113e78ecc793631c659d8d4" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 37748736, "records": [ { "name": "model.layers.15.self_attn.o_proj.weight", "shape": [ 4608, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 37748736, "byteOffset": 0 } ], "md5sum": "6f0b10d5db85c567deb5d966759af0cd" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 679477248, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.weight", "shape": [ 73728, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 679477248, "byteOffset": 0 } ], "md5sum": "fe0ae2bbb18c6e0179cbe97833c5cd6d" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.10.self_attn.qkv_proj.weight", "shape": [ 8192, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "e8bc0f6ed5fdff9f85e6ba0284f66b01" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 37748736, "records": [ { "name": "model.layers.10.self_attn.o_proj.weight", "shape": [ 4608, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 37748736, "byteOffset": 0 } ], "md5sum": "8c2719dbc5fd7513a948c92de098e927" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 339738624, "records": [ { "name": "model.layers.6.mlp.down_proj.weight", "shape": [ 4608, 36864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 339738624, "byteOffset": 0 } ], "md5sum": "2380c8423932cc6b542b0574b83200c2" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 679477248, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.weight", "shape": [ 73728, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 679477248, "byteOffset": 0 } ], "md5sum": "196e535102df782e482ca3147cc1c127" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 339738624, "records": [ { "name": "model.layers.7.mlp.down_proj.weight", "shape": [ 4608, 36864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 339738624, "byteOffset": 0 } ], "md5sum": "d047b7e99b3dd4be9c7c7faa6798d5d0" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 679477248, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.weight", "shape": [ 73728, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 679477248, "byteOffset": 0 } ], "md5sum": "9d4564c6ca52ada14b4630ca4be84c3b" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.7.self_attn.qkv_proj.weight", "shape": [ 8192, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "874c1a06468f373519867ffcefb370cc" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 37748736, "records": [ { "name": "model.layers.7.self_attn.o_proj.weight", "shape": [ 4608, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 37748736, "byteOffset": 0 } ], "md5sum": "e4d9c2ee9c5e0103a51777f02ce8eb0c" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 339738624, "records": [ { "name": "model.layers.8.mlp.down_proj.weight", "shape": [ 4608, 36864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 339738624, "byteOffset": 0 } ], "md5sum": "0cae21a4e33d59c1ec4539eb13568d37" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 679477248, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.weight", "shape": [ 73728, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 679477248, "byteOffset": 0 } ], "md5sum": "7f50d6caf5e55da261262ef68a0d793d" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.8.self_attn.qkv_proj.weight", "shape": [ 8192, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "a76dce23ab85b669e0f2928bbbf0867c" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 37748736, "records": [ { "name": "model.layers.8.self_attn.o_proj.weight", "shape": [ 4608, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 37748736, "byteOffset": 0 } ], "md5sum": "c8480558f1f90dad78885e50b97d6c48" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 339738624, "records": [ { "name": "model.layers.9.mlp.down_proj.weight", "shape": [ 4608, 36864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 339738624, "byteOffset": 0 } ], "md5sum": "1b885b2200798b7cfd1879572c59352e" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 679477248, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.weight", "shape": [ 73728, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 679477248, "byteOffset": 0 } ], "md5sum": "77f9abf4964a372bc69d2d3796e2589b" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.9.self_attn.qkv_proj.weight", "shape": [ 8192, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "c59b3117c62bda38b842653d6ceb375d" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 37748736, "records": [ { "name": "model.layers.9.self_attn.o_proj.weight", "shape": [ 4608, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 37748736, "byteOffset": 0 } ], "md5sum": "02d1044d98322c2a291a82cebe0e7857" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 339738624, "records": [ { "name": "model.layers.15.mlp.down_proj.weight", "shape": [ 4608, 36864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 339738624, "byteOffset": 0 } ], "md5sum": "906edd6dcbe6278695ca0e848c69e583" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 679477248, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.weight", "shape": [ 73728, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 679477248, "byteOffset": 0 } ], "md5sum": "5e9554edf16196c92858917406ea400a" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 339738624, "records": [ { "name": "model.layers.16.mlp.down_proj.weight", "shape": [ 4608, 36864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 339738624, "byteOffset": 0 } ], "md5sum": "08942848f84502d3a9872e335ca3fec2" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 679477248, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.weight", "shape": [ 73728, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 679477248, "byteOffset": 0 } ], "md5sum": "dc4db64c4ae40646745079d1a6c45b97" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.16.self_attn.qkv_proj.weight", "shape": [ 8192, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "4f77afd133ed31ce97d662c303318f8b" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 37748736, "records": [ { "name": "model.layers.16.self_attn.o_proj.weight", "shape": [ 4608, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 37748736, "byteOffset": 0 } ], "md5sum": "29aaabfda8a389edea789a19b10efd58" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 339738624, "records": [ { "name": "model.layers.17.mlp.down_proj.weight", "shape": [ 4608, 36864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 339738624, "byteOffset": 0 } ], "md5sum": "876710b458576180c5bdd37723a12cff" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 679477248, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.weight", "shape": [ 73728, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 679477248, "byteOffset": 0 } ], "md5sum": "edf947b8a8028c4e7278f496746a3d69" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.17.self_attn.qkv_proj.weight", "shape": [ 8192, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "65a0be861a8dc6fdcf2e7c2353637e3f" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 37748736, "records": [ { "name": "model.layers.17.self_attn.o_proj.weight", "shape": [ 4608, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 37748736, "byteOffset": 0 } ], "md5sum": "f7c84cdbd8fcbb6624d9bb17252cd1e2" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 339738624, "records": [ { "name": "model.layers.18.mlp.down_proj.weight", "shape": [ 4608, 36864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 339738624, "byteOffset": 0 } ], "md5sum": "8a1f6df7d173c58177b6ff49e44b6f60" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 679477248, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.weight", "shape": [ 73728, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 679477248, "byteOffset": 0 } ], "md5sum": "2f7c9d760ed57a0a963a975739bceba8" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.18.self_attn.qkv_proj.weight", "shape": [ 8192, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "50c66672f6e350aa32133092f67e2325" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 37748736, "records": [ { "name": "model.layers.18.self_attn.o_proj.weight", "shape": [ 4608, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 37748736, "byteOffset": 0 } ], "md5sum": "81798ecdb577f945f643e4e41d674c2c" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 679477248, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.weight", "shape": [ 73728, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 679477248, "byteOffset": 0 } ], "md5sum": "ce6aaf2fd04fdf82e550b9c3b3b0a986" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.19.self_attn.qkv_proj.weight", "shape": [ 8192, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "697236882302cf7618eb6a231156af19" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 37748736, "records": [ { "name": "model.layers.19.self_attn.o_proj.weight", "shape": [ 4608, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 37748736, "byteOffset": 0 } ], "md5sum": "ccd5b13f6da68b770003bae7acd90081" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 339738624, "records": [ { "name": "model.layers.19.mlp.down_proj.weight", "shape": [ 4608, 36864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 339738624, "byteOffset": 0 } ], "md5sum": "87f8c1eb1c0635f3c6a45fdedfdab1c0" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 339738624, "records": [ { "name": "model.layers.20.mlp.down_proj.weight", "shape": [ 4608, 36864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 339738624, "byteOffset": 0 } ], "md5sum": "0839d1f19482eaea044426792f2abad2" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 679477248, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.weight", "shape": [ 73728, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 679477248, "byteOffset": 0 } ], "md5sum": "b1f469791c6f561d7ab5378b3c573b68" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.20.self_attn.qkv_proj.weight", "shape": [ 8192, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "ad5e4eacb1548fdc4b2fa0c884cd2b5b" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 37748736, "records": [ { "name": "model.layers.20.self_attn.o_proj.weight", "shape": [ 4608, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 37748736, "byteOffset": 0 } ], "md5sum": "056e0b725496bb0a1dee4375e1f60900" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 339738624, "records": [ { "name": "model.layers.21.mlp.down_proj.weight", "shape": [ 4608, 36864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 339738624, "byteOffset": 0 } ], "md5sum": "00e7aea26a88ccb6e27fc43ee0b596c6" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 679477248, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.weight", "shape": [ 73728, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 679477248, "byteOffset": 0 } ], "md5sum": "632e2a9805f500bd67b9c8b46075ec96" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.21.self_attn.qkv_proj.weight", "shape": [ 8192, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "a19d25593152024db2cdb58653521249" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 37748736, "records": [ { "name": "model.layers.21.self_attn.o_proj.weight", "shape": [ 4608, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 37748736, "byteOffset": 0 } ], "md5sum": "e1521585a47d49646f32f00cfeb28f13" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 339738624, "records": [ { "name": "model.layers.22.mlp.down_proj.weight", "shape": [ 4608, 36864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 339738624, "byteOffset": 0 } ], "md5sum": "f7b5cf942e2ad27cb31b3d86bb934740" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 679477248, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.weight", "shape": [ 73728, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 679477248, "byteOffset": 0 } ], "md5sum": "bc20a1b9ce0d50def914fa3283740755" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.22.self_attn.qkv_proj.weight", "shape": [ 8192, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "181b6f0f812244ed28f845443394c2c3" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 37748736, "records": [ { "name": "model.layers.22.self_attn.o_proj.weight", "shape": [ 4608, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 37748736, "byteOffset": 0 } ], "md5sum": "ccc07720caf4878f8ab28a00247574eb" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 679477248, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.weight", "shape": [ 73728, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 679477248, "byteOffset": 0 } ], "md5sum": "6e880ad3a769101bf5d42b3b2bd30428" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.23.self_attn.qkv_proj.weight", "shape": [ 8192, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "ae6d7000e8302c78c975881680afb875" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 37748736, "records": [ { "name": "model.layers.23.self_attn.o_proj.weight", "shape": [ 4608, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 37748736, "byteOffset": 0 } ], "md5sum": "f0f8c0b3791755ffcb3e2bab3629b32e" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 339738624, "records": [ { "name": "model.layers.2.mlp.down_proj.weight", "shape": [ 4608, 36864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 339738624, "byteOffset": 0 } ], "md5sum": "c6a1621ea851755cc1c676c40ba8396c" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 679477248, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.weight", "shape": [ 73728, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 679477248, "byteOffset": 0 } ], "md5sum": "8ed9f65c0b647b2794a456560a2a22e6" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 339738624, "records": [ { "name": "model.layers.3.mlp.down_proj.weight", "shape": [ 4608, 36864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 339738624, "byteOffset": 0 } ], "md5sum": "58e2d4f9838fd2ddab6ce01a60a1c025" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 679477248, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.weight", "shape": [ 73728, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 679477248, "byteOffset": 0 } ], "md5sum": "a6b70b5bafedb22e5e6355741eb50fa3" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.3.self_attn.qkv_proj.weight", "shape": [ 8192, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "3093327e75aa189e4c1d62964f796cfb" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 37748736, "records": [ { "name": "model.layers.3.self_attn.o_proj.weight", "shape": [ 4608, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 37748736, "byteOffset": 0 } ], "md5sum": "fc87c5e22378d6e9057ffeee62b139ff" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 339738624, "records": [ { "name": "model.layers.4.mlp.down_proj.weight", "shape": [ 4608, 36864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 339738624, "byteOffset": 0 } ], "md5sum": "2d7be78a34ea444d640f252063277c3e" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 679477248, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.weight", "shape": [ 73728, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 679477248, "byteOffset": 0 } ], "md5sum": "7d86ef778cd55b79b4df50d4c34e8deb" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.4.self_attn.qkv_proj.weight", "shape": [ 8192, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "3a65d46a8dca4899ac05f0a2e382c3dd" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 37748736, "records": [ { "name": "model.layers.4.self_attn.o_proj.weight", "shape": [ 4608, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 37748736, "byteOffset": 0 } ], "md5sum": "a60ab57c6d1ce3da7030047c7b491e70" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 339738624, "records": [ { "name": "model.layers.5.mlp.down_proj.weight", "shape": [ 4608, 36864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 339738624, "byteOffset": 0 } ], "md5sum": "f27c1dc563fd7050411badbbd667e1c5" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 679477248, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.weight", "shape": [ 73728, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 679477248, "byteOffset": 0 } ], "md5sum": "73b24ad60ab31462446ce6602fa1a44e" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.5.self_attn.qkv_proj.weight", "shape": [ 8192, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "e831fef8e15fdf98ea271c1f80748476" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 37748736, "records": [ { "name": "model.layers.5.self_attn.o_proj.weight", "shape": [ 4608, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 37748736, "byteOffset": 0 } ], "md5sum": "f1d2b99785dc024c66e656607780a26d" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.6.self_attn.qkv_proj.weight", "shape": [ 8192, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "471f65297ef8e62ff152af3fbbcb29af" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 37748736, "records": [ { "name": "model.layers.6.self_attn.o_proj.weight", "shape": [ 4608, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 37748736, "byteOffset": 0 } ], "md5sum": "9f02c1eff8688c1446ed046a4e94d294" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 339738624, "records": [ { "name": "model.layers.23.mlp.down_proj.weight", "shape": [ 4608, 36864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 339738624, "byteOffset": 0 } ], "md5sum": "299eec1f29838a1619f651b8ac2c2d2a" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 339738624, "records": [ { "name": "model.layers.24.mlp.down_proj.weight", "shape": [ 4608, 36864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 339738624, "byteOffset": 0 } ], "md5sum": "48ee8aa14b6bedbf74ea626ccc6b66a9" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 679477248, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.weight", "shape": [ 73728, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 679477248, "byteOffset": 0 } ], "md5sum": "1d8c70a2fc2694d951a55a7614f2ee8f" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.24.self_attn.qkv_proj.weight", "shape": [ 8192, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "f2c72b1c908d4fe944fdab3339eaf781" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 37748736, "records": [ { "name": "model.layers.24.self_attn.o_proj.weight", "shape": [ 4608, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 37748736, "byteOffset": 0 } ], "md5sum": "15c479aacfa31f1227b563789686152d" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 339738624, "records": [ { "name": "model.layers.25.mlp.down_proj.weight", "shape": [ 4608, 36864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 339738624, "byteOffset": 0 } ], "md5sum": "24de14c07f42cb9d7142f9085470c02a" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 679477248, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.weight", "shape": [ 73728, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 679477248, "byteOffset": 0 } ], "md5sum": "561db5e2c4e9b20a00497d459e65fb0f" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.25.self_attn.qkv_proj.weight", "shape": [ 8192, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "1a8a3aec61c5bef5e6e7942fc2ba31b0" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 37748736, "records": [ { "name": "model.layers.25.self_attn.o_proj.weight", "shape": [ 4608, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 37748736, "byteOffset": 0 } ], "md5sum": "d4b800d82bb141c8a3f0531e7dc8362f" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 339738624, "records": [ { "name": "model.layers.26.mlp.down_proj.weight", "shape": [ 4608, 36864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 339738624, "byteOffset": 0 } ], "md5sum": "5f1d312f5a8aff7f31d45f49dba6b47a" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 679477248, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.weight", "shape": [ 73728, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 679477248, "byteOffset": 0 } ], "md5sum": "a68eb722f2505b2775c3b27591c531d3" }, { "dataPath": "params_shard_107.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.26.self_attn.qkv_proj.weight", "shape": [ 8192, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "2e82c8779b7d5498268a303a1fe33c42" }, { "dataPath": "params_shard_108.bin", "format": "raw-shard", "nbytes": 37748736, "records": [ { "name": "model.layers.26.self_attn.o_proj.weight", "shape": [ 4608, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 37748736, "byteOffset": 0 } ], "md5sum": "d196249f35fa960330bf61163eaf14e6" }, { "dataPath": "params_shard_109.bin", "format": "raw-shard", "nbytes": 339738624, "records": [ { "name": "model.layers.27.mlp.down_proj.weight", "shape": [ 4608, 36864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 339738624, "byteOffset": 0 } ], "md5sum": "4915ebff7e72c5278a55d0525ae23bcf" }, { "dataPath": "params_shard_110.bin", "format": "raw-shard", "nbytes": 679477248, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.weight", "shape": [ 73728, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 679477248, "byteOffset": 0 } ], "md5sum": "804a60e642f3dad08c7fcafd2637a225" }, { "dataPath": "params_shard_111.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.27.self_attn.qkv_proj.weight", "shape": [ 8192, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "55b2212a596e3f28e9ef49f4e1d58f85" }, { "dataPath": "params_shard_112.bin", "format": "raw-shard", "nbytes": 37748736, "records": [ { "name": "model.layers.27.self_attn.o_proj.weight", "shape": [ 4608, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 37748736, "byteOffset": 0 } ], "md5sum": "04ce72e3c3f6f7aa3771e2c9941f9092" }, { "dataPath": "params_shard_113.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.28.self_attn.qkv_proj.weight", "shape": [ 8192, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "99482aea32b7ab638410a48d1e389f61" }, { "dataPath": "params_shard_114.bin", "format": "raw-shard", "nbytes": 37748736, "records": [ { "name": "model.layers.28.self_attn.o_proj.weight", "shape": [ 4608, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 37748736, "byteOffset": 0 } ], "md5sum": "b0e38e30df4691211db0690a94039541" }, { "dataPath": "params_shard_115.bin", "format": "raw-shard", "nbytes": 339738624, "records": [ { "name": "model.layers.28.mlp.down_proj.weight", "shape": [ 4608, 36864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 339738624, "byteOffset": 0 } ], "md5sum": "628902c3c3c8e71990dccd546e73079b" }, { "dataPath": "params_shard_116.bin", "format": "raw-shard", "nbytes": 679477248, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.weight", "shape": [ 73728, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 679477248, "byteOffset": 0 } ], "md5sum": "0249fa2584c4e66e6217d6b4f2aa673c" }, { "dataPath": "params_shard_117.bin", "format": "raw-shard", "nbytes": 339738624, "records": [ { "name": "model.layers.29.mlp.down_proj.weight", "shape": [ 4608, 36864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 339738624, "byteOffset": 0 } ], "md5sum": "7e7ded135f939f346bdf016d8457982e" }, { "dataPath": "params_shard_118.bin", "format": "raw-shard", "nbytes": 679477248, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.weight", "shape": [ 73728, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 679477248, "byteOffset": 0 } ], "md5sum": "d33319299cd0ce8206cb31d488a0d57f" }, { "dataPath": "params_shard_119.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.29.self_attn.qkv_proj.weight", "shape": [ 8192, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "69703b0f869124b81aa722f5455e240a" }, { "dataPath": "params_shard_120.bin", "format": "raw-shard", "nbytes": 37748736, "records": [ { "name": "model.layers.29.self_attn.o_proj.weight", "shape": [ 4608, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 37748736, "byteOffset": 0 } ], "md5sum": "535ca2d671d58d7416a81e4843331b09" }, { "dataPath": "params_shard_121.bin", "format": "raw-shard", "nbytes": 339738624, "records": [ { "name": "model.layers.30.mlp.down_proj.weight", "shape": [ 4608, 36864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 339738624, "byteOffset": 0 } ], "md5sum": "1b2c8133ed20173bab22eedac23723c2" }, { "dataPath": "params_shard_122.bin", "format": "raw-shard", "nbytes": 679477248, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.weight", "shape": [ 73728, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 679477248, "byteOffset": 0 } ], "md5sum": "b47ca8b0c1b07aa6165cb3a6f9aee351" }, { "dataPath": "params_shard_123.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.30.self_attn.qkv_proj.weight", "shape": [ 8192, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "05d454a922c916dadf49c4c393f330fd" }, { "dataPath": "params_shard_124.bin", "format": "raw-shard", "nbytes": 37748736, "records": [ { "name": "model.layers.30.self_attn.o_proj.weight", "shape": [ 4608, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 37748736, "byteOffset": 0 } ], "md5sum": "63856a22d3fe94c5e7b826a664d8fe16" }, { "dataPath": "params_shard_125.bin", "format": "raw-shard", "nbytes": 339738624, "records": [ { "name": "model.layers.31.mlp.down_proj.weight", "shape": [ 4608, 36864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 339738624, "byteOffset": 0 } ], "md5sum": "f189245c12adcdbabb594dfd63004834" }, { "dataPath": "params_shard_126.bin", "format": "raw-shard", "nbytes": 679477248, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.weight", "shape": [ 73728, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 679477248, "byteOffset": 0 } ], "md5sum": "d783d0c7f7ce0d9ab67ca8cc88683a72" }, { "dataPath": "params_shard_127.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.31.self_attn.qkv_proj.weight", "shape": [ 8192, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "37b47716e6cab80d51d2ef22da02423a" }, { "dataPath": "params_shard_128.bin", "format": "raw-shard", "nbytes": 37748736, "records": [ { "name": "model.layers.31.self_attn.o_proj.weight", "shape": [ 4608, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 37748736, "byteOffset": 0 } ], "md5sum": "93136414d590df68e40cd9b7c8d54642" }, { "dataPath": "params_shard_129.bin", "format": "raw-shard", "nbytes": 679477248, "records": [ { "name": "model.layers.32.mlp.gate_up_proj.weight", "shape": [ 73728, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 679477248, "byteOffset": 0 } ], "md5sum": "8cebf3bf42c0be0fe4615e091045e5f1" }, { "dataPath": "params_shard_130.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.32.self_attn.qkv_proj.weight", "shape": [ 8192, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "d4a7221be2b3875d63bc943db6c7e9ea" }, { "dataPath": "params_shard_131.bin", "format": "raw-shard", "nbytes": 37748736, "records": [ { "name": "model.layers.32.self_attn.o_proj.weight", "shape": [ 4608, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 37748736, "byteOffset": 0 } ], "md5sum": "51482b34c93adf5591e4d20eb2fe9cc5" }, { "dataPath": "params_shard_132.bin", "format": "raw-shard", "nbytes": 339738624, "records": [ { "name": "model.layers.32.mlp.down_proj.weight", "shape": [ 4608, 36864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 339738624, "byteOffset": 0 } ], "md5sum": "a938aa19af2ac5e1a7be4a9d62afbb4a" }, { "dataPath": "params_shard_133.bin", "format": "raw-shard", "nbytes": 339738624, "records": [ { "name": "model.layers.33.mlp.down_proj.weight", "shape": [ 4608, 36864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 339738624, "byteOffset": 0 } ], "md5sum": "48de06f22b7d9f7e1062969b08f59bee" }, { "dataPath": "params_shard_134.bin", "format": "raw-shard", "nbytes": 679477248, "records": [ { "name": "model.layers.33.mlp.gate_up_proj.weight", "shape": [ 73728, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 679477248, "byteOffset": 0 } ], "md5sum": "c3cd145b1d52e5a789bacd5852d757cf" }, { "dataPath": "params_shard_135.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.33.self_attn.qkv_proj.weight", "shape": [ 8192, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "c5e75c19618091f7cc441e82cacdb178" }, { "dataPath": "params_shard_136.bin", "format": "raw-shard", "nbytes": 37748736, "records": [ { "name": "model.layers.33.self_attn.o_proj.weight", "shape": [ 4608, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 37748736, "byteOffset": 0 } ], "md5sum": "a9328660ee78726d415ecedb73a05595" }, { "dataPath": "params_shard_137.bin", "format": "raw-shard", "nbytes": 339738624, "records": [ { "name": "model.layers.34.mlp.down_proj.weight", "shape": [ 4608, 36864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 339738624, "byteOffset": 0 } ], "md5sum": "b5400774e11eb7479e59682fcf48ca2f" }, { "dataPath": "params_shard_138.bin", "format": "raw-shard", "nbytes": 679477248, "records": [ { "name": "model.layers.34.mlp.gate_up_proj.weight", "shape": [ 73728, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 679477248, "byteOffset": 0 } ], "md5sum": "a592b06aeec1f5c5e9b8fed80352647e" }, { "dataPath": "params_shard_139.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.34.self_attn.qkv_proj.weight", "shape": [ 8192, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "550804e4f5d0aa2c564f8a6d370914af" }, { "dataPath": "params_shard_140.bin", "format": "raw-shard", "nbytes": 37748736, "records": [ { "name": "model.layers.34.self_attn.o_proj.weight", "shape": [ 4608, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 37748736, "byteOffset": 0 } ], "md5sum": "a535254722180c0a0e340615721be681" }, { "dataPath": "params_shard_141.bin", "format": "raw-shard", "nbytes": 339738624, "records": [ { "name": "model.layers.35.mlp.down_proj.weight", "shape": [ 4608, 36864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 339738624, "byteOffset": 0 } ], "md5sum": "d19ca4e5552327dbf359da82fff07851" }, { "dataPath": "params_shard_142.bin", "format": "raw-shard", "nbytes": 679477248, "records": [ { "name": "model.layers.35.mlp.gate_up_proj.weight", "shape": [ 73728, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 679477248, "byteOffset": 0 } ], "md5sum": "d84a53918f7caa3fc1d55e90c5ff3063" }, { "dataPath": "params_shard_143.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.35.self_attn.qkv_proj.weight", "shape": [ 8192, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "75ff61a4d93ae52a38cee962586e09d4" }, { "dataPath": "params_shard_144.bin", "format": "raw-shard", "nbytes": 37748736, "records": [ { "name": "model.layers.35.self_attn.o_proj.weight", "shape": [ 4608, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 37748736, "byteOffset": 0 } ], "md5sum": "4b3d7f04e78cdea4fd912782085059fe" }, { "dataPath": "params_shard_145.bin", "format": "raw-shard", "nbytes": 679477248, "records": [ { "name": "model.layers.36.mlp.gate_up_proj.weight", "shape": [ 73728, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 679477248, "byteOffset": 0 } ], "md5sum": "2f77cea9990c9b182caaf01fc65731aa" }, { "dataPath": "params_shard_146.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.36.self_attn.qkv_proj.weight", "shape": [ 8192, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "bf682f566b1f0f005d98b2165f706985" }, { "dataPath": "params_shard_147.bin", "format": "raw-shard", "nbytes": 37748736, "records": [ { "name": "model.layers.36.self_attn.o_proj.weight", "shape": [ 4608, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 37748736, "byteOffset": 0 } ], "md5sum": "f6e31dad96223a2203e3abc4aeab63b6" }, { "dataPath": "params_shard_148.bin", "format": "raw-shard", "nbytes": 339738624, "records": [ { "name": "model.layers.36.mlp.down_proj.weight", "shape": [ 4608, 36864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 339738624, "byteOffset": 0 } ], "md5sum": "f57d8dc2aeace971196913bdb3bb20a7" }, { "dataPath": "params_shard_149.bin", "format": "raw-shard", "nbytes": 339738624, "records": [ { "name": "model.layers.37.mlp.down_proj.weight", "shape": [ 4608, 36864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 339738624, "byteOffset": 0 } ], "md5sum": "43f05564b30a41d16b74a9a2a58461c5" }, { "dataPath": "params_shard_150.bin", "format": "raw-shard", "nbytes": 679477248, "records": [ { "name": "model.layers.37.mlp.gate_up_proj.weight", "shape": [ 73728, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 679477248, "byteOffset": 0 } ], "md5sum": "15ef6e151987113b53eb6b8bf553cb93" }, { "dataPath": "params_shard_151.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.37.self_attn.qkv_proj.weight", "shape": [ 8192, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "ba6c983dcd13cb7ebe67393fddb18ebd" }, { "dataPath": "params_shard_152.bin", "format": "raw-shard", "nbytes": 37748736, "records": [ { "name": "model.layers.37.self_attn.o_proj.weight", "shape": [ 4608, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 37748736, "byteOffset": 0 } ], "md5sum": "89b326f0bbc270c5d848211b33f75282" }, { "dataPath": "params_shard_153.bin", "format": "raw-shard", "nbytes": 339738624, "records": [ { "name": "model.layers.38.mlp.down_proj.weight", "shape": [ 4608, 36864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 339738624, "byteOffset": 0 } ], "md5sum": "6d6c98526eeecee414755d1b9eb218c2" }, { "dataPath": "params_shard_154.bin", "format": "raw-shard", "nbytes": 679477248, "records": [ { "name": "model.layers.38.mlp.gate_up_proj.weight", "shape": [ 73728, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 679477248, "byteOffset": 0 } ], "md5sum": "efc27d1bfeeae5a55d187023d814fda7" }, { "dataPath": "params_shard_155.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.38.self_attn.qkv_proj.weight", "shape": [ 8192, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "89d55c72ca5f2455acdf2a47c3c206fd" }, { "dataPath": "params_shard_156.bin", "format": "raw-shard", "nbytes": 37748736, "records": [ { "name": "model.layers.38.self_attn.o_proj.weight", "shape": [ 4608, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 37748736, "byteOffset": 0 } ], "md5sum": "53463953a04a04caaa844b11a083b559" }, { "dataPath": "params_shard_157.bin", "format": "raw-shard", "nbytes": 339738624, "records": [ { "name": "model.layers.39.mlp.down_proj.weight", "shape": [ 4608, 36864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 339738624, "byteOffset": 0 } ], "md5sum": "e2f625fccb46df5fe8783092cf1f38b8" }, { "dataPath": "params_shard_158.bin", "format": "raw-shard", "nbytes": 679477248, "records": [ { "name": "model.layers.39.mlp.gate_up_proj.weight", "shape": [ 73728, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 679477248, "byteOffset": 0 } ], "md5sum": "4e18e425f8893842443b76e0f065048f" }, { "dataPath": "params_shard_159.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.39.self_attn.qkv_proj.weight", "shape": [ 8192, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "2d936649571d6961692ddc69d0a5db70" }, { "dataPath": "params_shard_160.bin", "format": "raw-shard", "nbytes": 37748736, "records": [ { "name": "model.layers.39.self_attn.o_proj.weight", "shape": [ 4608, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 37748736, "byteOffset": 0 } ], "md5sum": "bc34cc6fe4a37feac389b36fe18dd2d0" }, { "dataPath": "params_shard_161.bin", "format": "raw-shard", "nbytes": 339738624, "records": [ { "name": "model.layers.40.mlp.down_proj.weight", "shape": [ 4608, 36864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 339738624, "byteOffset": 0 } ], "md5sum": "03831ad4f8f9ae173b2f5c7d07dafa1a" }, { "dataPath": "params_shard_162.bin", "format": "raw-shard", "nbytes": 679477248, "records": [ { "name": "model.layers.40.mlp.gate_up_proj.weight", "shape": [ 73728, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 679477248, "byteOffset": 0 } ], "md5sum": "c8e9e9fc07c29838d7c8f03606f6867b" }, { "dataPath": "params_shard_163.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.40.self_attn.qkv_proj.weight", "shape": [ 8192, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "1e11442984dc3747db6f25e2bfbfd5d2" }, { "dataPath": "params_shard_164.bin", "format": "raw-shard", "nbytes": 37748736, "records": [ { "name": "model.layers.40.self_attn.o_proj.weight", "shape": [ 4608, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 37748736, "byteOffset": 0 } ], "md5sum": "6b4b772980ae3f6c831e1cdbcb29d787" }, { "dataPath": "params_shard_165.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.41.self_attn.qkv_proj.weight", "shape": [ 8192, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "1ad9afaadf62bf00e595cdd08e203696" }, { "dataPath": "params_shard_166.bin", "format": "raw-shard", "nbytes": 37748736, "records": [ { "name": "model.layers.41.self_attn.o_proj.weight", "shape": [ 4608, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 37748736, "byteOffset": 0 } ], "md5sum": "892bc0ed228f6ffee8747838ff18ef18" }, { "dataPath": "params_shard_167.bin", "format": "raw-shard", "nbytes": 339738624, "records": [ { "name": "model.layers.41.mlp.down_proj.weight", "shape": [ 4608, 36864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 339738624, "byteOffset": 0 } ], "md5sum": "b7eff25eec698c74beb6c2e815a5c979" }, { "dataPath": "params_shard_168.bin", "format": "raw-shard", "nbytes": 679477248, "records": [ { "name": "model.layers.41.mlp.gate_up_proj.weight", "shape": [ 73728, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 679477248, "byteOffset": 0 } ], "md5sum": "80d03d9a7dd3d5cdc45175f715abc76f" }, { "dataPath": "params_shard_169.bin", "format": "raw-shard", "nbytes": 339738624, "records": [ { "name": "model.layers.42.mlp.down_proj.weight", "shape": [ 4608, 36864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 339738624, "byteOffset": 0 } ], "md5sum": "35370a4adb178622ed43e4d56c5c934e" }, { "dataPath": "params_shard_170.bin", "format": "raw-shard", "nbytes": 679477248, "records": [ { "name": "model.layers.42.mlp.gate_up_proj.weight", "shape": [ 73728, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 679477248, "byteOffset": 0 } ], "md5sum": "368303116313404fd25eed9653c855d7" }, { "dataPath": "params_shard_171.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.42.self_attn.qkv_proj.weight", "shape": [ 8192, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "47d4437856896ed12b829a78bed5a6eb" }, { "dataPath": "params_shard_172.bin", "format": "raw-shard", "nbytes": 37748736, "records": [ { "name": "model.layers.42.self_attn.o_proj.weight", "shape": [ 4608, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 37748736, "byteOffset": 0 } ], "md5sum": "5adebc49547de2e6f7bb6851b03f028a" }, { "dataPath": "params_shard_173.bin", "format": "raw-shard", "nbytes": 339738624, "records": [ { "name": "model.layers.43.mlp.down_proj.weight", "shape": [ 4608, 36864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 339738624, "byteOffset": 0 } ], "md5sum": "2e6dcc7e7418af3d7fb92d5aa42c4d08" }, { "dataPath": "params_shard_174.bin", "format": "raw-shard", "nbytes": 679477248, "records": [ { "name": "model.layers.43.mlp.gate_up_proj.weight", "shape": [ 73728, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 679477248, "byteOffset": 0 } ], "md5sum": "432b8c18203509aacdd770908d566e7e" }, { "dataPath": "params_shard_175.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.43.self_attn.qkv_proj.weight", "shape": [ 8192, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "6f0c116284eb0dc45ef46dad7746aa1c" }, { "dataPath": "params_shard_176.bin", "format": "raw-shard", "nbytes": 37748736, "records": [ { "name": "model.layers.43.self_attn.o_proj.weight", "shape": [ 4608, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 37748736, "byteOffset": 0 } ], "md5sum": "f6ba3f7b643bec7f8446f04217afb0b9" }, { "dataPath": "params_shard_177.bin", "format": "raw-shard", "nbytes": 339738624, "records": [ { "name": "model.layers.44.mlp.down_proj.weight", "shape": [ 4608, 36864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 339738624, "byteOffset": 0 } ], "md5sum": "e398958a8be1e10a8478d1d2e15fb14c" }, { "dataPath": "params_shard_178.bin", "format": "raw-shard", "nbytes": 679477248, "records": [ { "name": "model.layers.44.mlp.gate_up_proj.weight", "shape": [ 73728, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 679477248, "byteOffset": 0 } ], "md5sum": "0c38211e4f037b8d63443123ec8d8496" }, { "dataPath": "params_shard_179.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.44.self_attn.qkv_proj.weight", "shape": [ 8192, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "c58f41bfdbd0c5d432dd75ae2225c817" }, { "dataPath": "params_shard_180.bin", "format": "raw-shard", "nbytes": 37748736, "records": [ { "name": "model.layers.44.self_attn.o_proj.weight", "shape": [ 4608, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 37748736, "byteOffset": 0 } ], "md5sum": "1fa5a61b58c9f02732d4e1463d092599" }, { "dataPath": "params_shard_181.bin", "format": "raw-shard", "nbytes": 679477248, "records": [ { "name": "model.layers.45.mlp.gate_up_proj.weight", "shape": [ 73728, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 679477248, "byteOffset": 0 } ], "md5sum": "68b213f87a3df7334432a82dfd68ca8b" }, { "dataPath": "params_shard_182.bin", "format": "raw-shard", "nbytes": 75497472, "records": [ { "name": "model.layers.45.self_attn.qkv_proj.weight", "shape": [ 8192, 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 75497472, "byteOffset": 0 } ], "md5sum": "2b47a8861441bdf53f9dac6ce02980e3" }, { "dataPath": "params_shard_183.bin", "format": "raw-shard", "nbytes": 37748736, "records": [ { "name": "model.layers.45.self_attn.o_proj.weight", "shape": [ 4608, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 37748736, "byteOffset": 0 } ], "md5sum": "21bf969aca87c0a3b40035d5cddf4b68" }, { "dataPath": "params_shard_184.bin", "format": "raw-shard", "nbytes": 339738624, "records": [ { "name": "model.layers.45.mlp.down_proj.weight", "shape": [ 4608, 36864 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 339738624, "byteOffset": 0 } ], "md5sum": "dbd18d3a38c3bad9a0b9198a90fa9760" }, { "dataPath": "params_shard_185.bin", "format": "raw-shard", "nbytes": 1704960, "records": [ { "name": "model.layers.0.input_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 0 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 9216 }, { "name": "model.layers.0.post_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 18432 }, { "name": "model.layers.0.pre_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 27648 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 36864 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 46080 }, { "name": "model.layers.1.post_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 55296 }, { "name": "model.layers.1.pre_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 64512 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 73728 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 82944 }, { "name": "model.layers.10.post_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 92160 }, { "name": "model.layers.10.pre_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 101376 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 110592 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 119808 }, { "name": "model.layers.11.post_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 129024 }, { "name": "model.layers.11.pre_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 138240 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 147456 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 156672 }, { "name": "model.layers.12.post_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 165888 }, { "name": "model.layers.12.pre_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 175104 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 184320 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 193536 }, { "name": "model.layers.13.post_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 202752 }, { "name": "model.layers.13.pre_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 211968 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 221184 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 230400 }, { "name": "model.layers.14.post_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 239616 }, { "name": "model.layers.14.pre_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 248832 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 258048 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 267264 }, { "name": "model.layers.6.post_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 276480 }, { "name": "model.layers.6.pre_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 285696 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 294912 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 304128 }, { "name": "model.layers.7.post_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 313344 }, { "name": "model.layers.7.pre_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 322560 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 331776 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 340992 }, { "name": "model.layers.8.post_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 350208 }, { "name": "model.layers.8.pre_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 359424 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 368640 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 377856 }, { "name": "model.layers.9.post_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 387072 }, { "name": "model.layers.9.pre_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 396288 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 405504 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 414720 }, { "name": "model.layers.15.post_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 423936 }, { "name": "model.layers.15.pre_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 433152 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 442368 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 451584 }, { "name": "model.layers.16.post_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 460800 }, { "name": "model.layers.16.pre_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 470016 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 479232 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 488448 }, { "name": "model.layers.17.post_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 497664 }, { "name": "model.layers.17.pre_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 506880 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 516096 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 525312 }, { "name": "model.layers.18.post_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 534528 }, { "name": "model.layers.18.pre_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 543744 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 552960 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 562176 }, { "name": "model.layers.19.post_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 571392 }, { "name": "model.layers.19.pre_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 580608 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 589824 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 599040 }, { "name": "model.layers.20.post_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 608256 }, { "name": "model.layers.20.pre_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 617472 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 626688 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 635904 }, { "name": "model.layers.21.post_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 645120 }, { "name": "model.layers.21.pre_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 654336 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 663552 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 672768 }, { "name": "model.layers.22.post_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 681984 }, { "name": "model.layers.22.pre_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 691200 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 700416 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 709632 }, { "name": "model.layers.2.post_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 718848 }, { "name": "model.layers.2.pre_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 728064 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 737280 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 746496 }, { "name": "model.layers.3.post_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 755712 }, { "name": "model.layers.3.pre_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 764928 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 774144 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 783360 }, { "name": "model.layers.4.post_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 792576 }, { "name": "model.layers.4.pre_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 801792 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 811008 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 820224 }, { "name": "model.layers.5.post_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 829440 }, { "name": "model.layers.5.pre_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 838656 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 847872 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 857088 }, { "name": "model.layers.23.post_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 866304 }, { "name": "model.layers.23.pre_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 875520 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 884736 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 893952 }, { "name": "model.layers.24.post_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 903168 }, { "name": "model.layers.24.pre_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 912384 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 921600 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 930816 }, { "name": "model.layers.25.post_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 940032 }, { "name": "model.layers.25.pre_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 949248 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 958464 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 967680 }, { "name": "model.layers.26.post_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 976896 }, { "name": "model.layers.26.pre_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 986112 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 995328 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 1004544 }, { "name": "model.layers.27.post_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 1013760 }, { "name": "model.layers.27.pre_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 1022976 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 1032192 }, { "name": "model.layers.28.post_attention_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 1041408 }, { "name": "model.layers.28.post_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 1050624 }, { "name": "model.layers.28.pre_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 1059840 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 1069056 }, { "name": "model.layers.29.post_attention_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 1078272 }, { "name": "model.layers.29.post_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 1087488 }, { "name": "model.layers.29.pre_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 1096704 }, { "name": "model.layers.30.input_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 1105920 }, { "name": "model.layers.30.post_attention_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 1115136 }, { "name": "model.layers.30.post_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 1124352 }, { "name": "model.layers.30.pre_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 1133568 }, { "name": "model.layers.31.input_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 1142784 }, { "name": "model.layers.31.post_attention_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 1152000 }, { "name": "model.layers.31.post_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 1161216 }, { "name": "model.layers.31.pre_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 1170432 }, { "name": "model.layers.32.input_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 1179648 }, { "name": "model.layers.32.post_attention_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 1188864 }, { "name": "model.layers.32.post_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 1198080 }, { "name": "model.layers.32.pre_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 1207296 }, { "name": "model.layers.33.input_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 1216512 }, { "name": "model.layers.33.post_attention_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 1225728 }, { "name": "model.layers.33.post_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 1234944 }, { "name": "model.layers.33.pre_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 1244160 }, { "name": "model.layers.34.input_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 1253376 }, { "name": "model.layers.34.post_attention_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 1262592 }, { "name": "model.layers.34.post_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 1271808 }, { "name": "model.layers.34.pre_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 1281024 }, { "name": "model.layers.35.input_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 1290240 }, { "name": "model.layers.35.post_attention_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 1299456 }, { "name": "model.layers.35.post_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 1308672 }, { "name": "model.layers.35.pre_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 1317888 }, { "name": "model.layers.36.input_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 1327104 }, { "name": "model.layers.36.post_attention_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 1336320 }, { "name": "model.layers.36.post_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 1345536 }, { "name": "model.layers.36.pre_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 1354752 }, { "name": "model.layers.37.input_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 1363968 }, { "name": "model.layers.37.post_attention_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 1373184 }, { "name": "model.layers.37.post_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 1382400 }, { "name": "model.layers.37.pre_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 1391616 }, { "name": "model.layers.38.input_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 1400832 }, { "name": "model.layers.38.post_attention_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 1410048 }, { "name": "model.layers.38.post_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 1419264 }, { "name": "model.layers.38.pre_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 1428480 }, { "name": "model.layers.39.input_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 1437696 }, { "name": "model.layers.39.post_attention_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 1446912 }, { "name": "model.layers.39.post_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 1456128 }, { "name": "model.layers.39.pre_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 1465344 }, { "name": "model.layers.40.input_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 1474560 }, { "name": "model.layers.40.post_attention_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 1483776 }, { "name": "model.layers.40.post_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 1492992 }, { "name": "model.layers.40.pre_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 1502208 }, { "name": "model.layers.41.input_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 1511424 }, { "name": "model.layers.41.post_attention_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 1520640 }, { "name": "model.layers.41.post_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 1529856 }, { "name": "model.layers.41.pre_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 1539072 }, { "name": "model.layers.42.input_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 1548288 }, { "name": "model.layers.42.post_attention_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 1557504 }, { "name": "model.layers.42.post_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 1566720 }, { "name": "model.layers.42.pre_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 1575936 }, { "name": "model.layers.43.input_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 1585152 }, { "name": "model.layers.43.post_attention_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 1594368 }, { "name": "model.layers.43.post_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 1603584 }, { "name": "model.layers.43.pre_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 1612800 }, { "name": "model.layers.44.input_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 1622016 }, { "name": "model.layers.44.post_attention_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 1631232 }, { "name": "model.layers.44.post_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 1640448 }, { "name": "model.layers.44.pre_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 1649664 }, { "name": "model.layers.45.input_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 1658880 }, { "name": "model.layers.45.post_attention_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 1668096 }, { "name": "model.layers.45.post_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 1677312 }, { "name": "model.layers.45.pre_feedforward_layernorm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 1686528 }, { "name": "model.norm.weight", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 1695744 } ], "md5sum": "ac76064680a74df4c5cfd5b812c392d0" } ] }