|
{ |
|
"metadata": { |
|
"ParamSize": 370, |
|
"ParamBytes": 54454256640.0, |
|
"BitsPerParam": 14.686606395927337 |
|
}, |
|
"records": [ |
|
{ |
|
"dataPath": "params_shard_0.bin", |
|
"format": "raw-shard", |
|
"nbytes": 2359296000, |
|
"records": [ |
|
{ |
|
"name": "model.embed_tokens.weight", |
|
"shape": [ |
|
256000, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 2359296000, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "dc36e04174ceac3d7d23463b5d0d5181" |
|
}, |
|
{ |
|
"dataPath": "params_shard_1.bin", |
|
"format": "raw-shard", |
|
"nbytes": 339738624, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.mlp.down_proj.weight", |
|
"shape": [ |
|
4608, |
|
36864 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 339738624, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4f0e853727e47613806ffa9c4c482ff2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_2.bin", |
|
"format": "raw-shard", |
|
"nbytes": 679477248, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
73728, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 679477248, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "21fc9a247c7e078674e21f55791089e3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_3.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
8192, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7b6c03f580fe4119384785ef11ca4387" |
|
}, |
|
{ |
|
"dataPath": "params_shard_4.bin", |
|
"format": "raw-shard", |
|
"nbytes": 37748736, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.self_attn.o_proj.weight", |
|
"shape": [ |
|
4608, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 37748736, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0a4c23fe136646c69827d68a51d94fff" |
|
}, |
|
{ |
|
"dataPath": "params_shard_5.bin", |
|
"format": "raw-shard", |
|
"nbytes": 339738624, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.mlp.down_proj.weight", |
|
"shape": [ |
|
4608, |
|
36864 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 339738624, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6b485dacf260f4fa5690ad5f9c056cdb" |
|
}, |
|
{ |
|
"dataPath": "params_shard_6.bin", |
|
"format": "raw-shard", |
|
"nbytes": 679477248, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
73728, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 679477248, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "72d93a6fa7ce8ef45f7e92291de659a7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_7.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
8192, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "faebe2c0b094d27995cea1fb726b162a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_8.bin", |
|
"format": "raw-shard", |
|
"nbytes": 37748736, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.self_attn.o_proj.weight", |
|
"shape": [ |
|
4608, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 37748736, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b7486ddadec0d8c5b2cdfbd015070d84" |
|
}, |
|
{ |
|
"dataPath": "params_shard_9.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
8192, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a29efe9166112e96288dbca63318d224" |
|
}, |
|
{ |
|
"dataPath": "params_shard_10.bin", |
|
"format": "raw-shard", |
|
"nbytes": 37748736, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.self_attn.o_proj.weight", |
|
"shape": [ |
|
4608, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 37748736, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d6c07888f84c4d7b9207c8af8812c8c8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_11.bin", |
|
"format": "raw-shard", |
|
"nbytes": 339738624, |
|
"records": [ |
|
{ |
|
"name": "model.layers.10.mlp.down_proj.weight", |
|
"shape": [ |
|
4608, |
|
36864 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 339738624, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "cc641d45e6a9f09da1abeb52f359a23a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_12.bin", |
|
"format": "raw-shard", |
|
"nbytes": 339738624, |
|
"records": [ |
|
{ |
|
"name": "model.layers.11.mlp.down_proj.weight", |
|
"shape": [ |
|
4608, |
|
36864 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 339738624, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f04105260c6b8b5cea9222a47bc2eed2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_13.bin", |
|
"format": "raw-shard", |
|
"nbytes": 679477248, |
|
"records": [ |
|
{ |
|
"name": "model.layers.11.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
73728, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 679477248, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c88be0bc9d4893fcd7ec78bc4190a4b5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_14.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.11.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
8192, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "96bc024d0029894355a306ed1ca2c93a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_15.bin", |
|
"format": "raw-shard", |
|
"nbytes": 37748736, |
|
"records": [ |
|
{ |
|
"name": "model.layers.11.self_attn.o_proj.weight", |
|
"shape": [ |
|
4608, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 37748736, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "eea52126cae035d5862119a6130f4b95" |
|
}, |
|
{ |
|
"dataPath": "params_shard_16.bin", |
|
"format": "raw-shard", |
|
"nbytes": 339738624, |
|
"records": [ |
|
{ |
|
"name": "model.layers.12.mlp.down_proj.weight", |
|
"shape": [ |
|
4608, |
|
36864 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 339738624, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c35f2943c085189db06e8fa5fdefcac2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_17.bin", |
|
"format": "raw-shard", |
|
"nbytes": 679477248, |
|
"records": [ |
|
{ |
|
"name": "model.layers.12.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
73728, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 679477248, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e2f728f041fea40538f97df2a74b6a61" |
|
}, |
|
{ |
|
"dataPath": "params_shard_18.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.12.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
8192, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "53977f25830b1d83063393cf6e06e11b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_19.bin", |
|
"format": "raw-shard", |
|
"nbytes": 37748736, |
|
"records": [ |
|
{ |
|
"name": "model.layers.12.self_attn.o_proj.weight", |
|
"shape": [ |
|
4608, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 37748736, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1d7c1b03ed5c22fd10e07f222ce40343" |
|
}, |
|
{ |
|
"dataPath": "params_shard_20.bin", |
|
"format": "raw-shard", |
|
"nbytes": 339738624, |
|
"records": [ |
|
{ |
|
"name": "model.layers.13.mlp.down_proj.weight", |
|
"shape": [ |
|
4608, |
|
36864 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 339738624, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2a9e7c74a08e7d4a740ef5cc1d16e1e4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_21.bin", |
|
"format": "raw-shard", |
|
"nbytes": 679477248, |
|
"records": [ |
|
{ |
|
"name": "model.layers.13.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
73728, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 679477248, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "60f7265756557068a21f4a74a00e5065" |
|
}, |
|
{ |
|
"dataPath": "params_shard_22.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.13.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
8192, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2f08ac47e48250c696cf73e100725deb" |
|
}, |
|
{ |
|
"dataPath": "params_shard_23.bin", |
|
"format": "raw-shard", |
|
"nbytes": 37748736, |
|
"records": [ |
|
{ |
|
"name": "model.layers.13.self_attn.o_proj.weight", |
|
"shape": [ |
|
4608, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 37748736, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0f343ca21f549dfe46fc541759f83444" |
|
}, |
|
{ |
|
"dataPath": "params_shard_24.bin", |
|
"format": "raw-shard", |
|
"nbytes": 339738624, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.mlp.down_proj.weight", |
|
"shape": [ |
|
4608, |
|
36864 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 339738624, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3c573cb4a07c2dbf1ba61c2bbdbe7da5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_25.bin", |
|
"format": "raw-shard", |
|
"nbytes": 679477248, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
73728, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 679477248, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f953f21956b50b4bc9e2d1b584883141" |
|
}, |
|
{ |
|
"dataPath": "params_shard_26.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
8192, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ce383a398f02af492b0bc7b8eef97aaa" |
|
}, |
|
{ |
|
"dataPath": "params_shard_27.bin", |
|
"format": "raw-shard", |
|
"nbytes": 37748736, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.self_attn.o_proj.weight", |
|
"shape": [ |
|
4608, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 37748736, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "06b9fa6b136e7cdc917af3a48d5f696a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_28.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
8192, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "fb73d3cf5113e78ecc793631c659d8d4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_29.bin", |
|
"format": "raw-shard", |
|
"nbytes": 37748736, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.self_attn.o_proj.weight", |
|
"shape": [ |
|
4608, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 37748736, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6f0b10d5db85c567deb5d966759af0cd" |
|
}, |
|
{ |
|
"dataPath": "params_shard_30.bin", |
|
"format": "raw-shard", |
|
"nbytes": 679477248, |
|
"records": [ |
|
{ |
|
"name": "model.layers.10.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
73728, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 679477248, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "fe0ae2bbb18c6e0179cbe97833c5cd6d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_31.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.10.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
8192, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e8bc0f6ed5fdff9f85e6ba0284f66b01" |
|
}, |
|
{ |
|
"dataPath": "params_shard_32.bin", |
|
"format": "raw-shard", |
|
"nbytes": 37748736, |
|
"records": [ |
|
{ |
|
"name": "model.layers.10.self_attn.o_proj.weight", |
|
"shape": [ |
|
4608, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 37748736, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8c2719dbc5fd7513a948c92de098e927" |
|
}, |
|
{ |
|
"dataPath": "params_shard_33.bin", |
|
"format": "raw-shard", |
|
"nbytes": 339738624, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.mlp.down_proj.weight", |
|
"shape": [ |
|
4608, |
|
36864 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 339738624, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2380c8423932cc6b542b0574b83200c2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_34.bin", |
|
"format": "raw-shard", |
|
"nbytes": 679477248, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
73728, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 679477248, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "196e535102df782e482ca3147cc1c127" |
|
}, |
|
{ |
|
"dataPath": "params_shard_35.bin", |
|
"format": "raw-shard", |
|
"nbytes": 339738624, |
|
"records": [ |
|
{ |
|
"name": "model.layers.7.mlp.down_proj.weight", |
|
"shape": [ |
|
4608, |
|
36864 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 339738624, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d047b7e99b3dd4be9c7c7faa6798d5d0" |
|
}, |
|
{ |
|
"dataPath": "params_shard_36.bin", |
|
"format": "raw-shard", |
|
"nbytes": 679477248, |
|
"records": [ |
|
{ |
|
"name": "model.layers.7.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
73728, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 679477248, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9d4564c6ca52ada14b4630ca4be84c3b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_37.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.7.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
8192, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "874c1a06468f373519867ffcefb370cc" |
|
}, |
|
{ |
|
"dataPath": "params_shard_38.bin", |
|
"format": "raw-shard", |
|
"nbytes": 37748736, |
|
"records": [ |
|
{ |
|
"name": "model.layers.7.self_attn.o_proj.weight", |
|
"shape": [ |
|
4608, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 37748736, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e4d9c2ee9c5e0103a51777f02ce8eb0c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_39.bin", |
|
"format": "raw-shard", |
|
"nbytes": 339738624, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.mlp.down_proj.weight", |
|
"shape": [ |
|
4608, |
|
36864 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 339738624, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0cae21a4e33d59c1ec4539eb13568d37" |
|
}, |
|
{ |
|
"dataPath": "params_shard_40.bin", |
|
"format": "raw-shard", |
|
"nbytes": 679477248, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
73728, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 679477248, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7f50d6caf5e55da261262ef68a0d793d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_41.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
8192, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a76dce23ab85b669e0f2928bbbf0867c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_42.bin", |
|
"format": "raw-shard", |
|
"nbytes": 37748736, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.self_attn.o_proj.weight", |
|
"shape": [ |
|
4608, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 37748736, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c8480558f1f90dad78885e50b97d6c48" |
|
}, |
|
{ |
|
"dataPath": "params_shard_43.bin", |
|
"format": "raw-shard", |
|
"nbytes": 339738624, |
|
"records": [ |
|
{ |
|
"name": "model.layers.9.mlp.down_proj.weight", |
|
"shape": [ |
|
4608, |
|
36864 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 339738624, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1b885b2200798b7cfd1879572c59352e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_44.bin", |
|
"format": "raw-shard", |
|
"nbytes": 679477248, |
|
"records": [ |
|
{ |
|
"name": "model.layers.9.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
73728, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 679477248, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "77f9abf4964a372bc69d2d3796e2589b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_45.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.9.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
8192, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c59b3117c62bda38b842653d6ceb375d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_46.bin", |
|
"format": "raw-shard", |
|
"nbytes": 37748736, |
|
"records": [ |
|
{ |
|
"name": "model.layers.9.self_attn.o_proj.weight", |
|
"shape": [ |
|
4608, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 37748736, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "02d1044d98322c2a291a82cebe0e7857" |
|
}, |
|
{ |
|
"dataPath": "params_shard_47.bin", |
|
"format": "raw-shard", |
|
"nbytes": 339738624, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.mlp.down_proj.weight", |
|
"shape": [ |
|
4608, |
|
36864 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 339738624, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "906edd6dcbe6278695ca0e848c69e583" |
|
}, |
|
{ |
|
"dataPath": "params_shard_48.bin", |
|
"format": "raw-shard", |
|
"nbytes": 679477248, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
73728, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 679477248, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5e9554edf16196c92858917406ea400a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_49.bin", |
|
"format": "raw-shard", |
|
"nbytes": 339738624, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.mlp.down_proj.weight", |
|
"shape": [ |
|
4608, |
|
36864 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 339738624, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "08942848f84502d3a9872e335ca3fec2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_50.bin", |
|
"format": "raw-shard", |
|
"nbytes": 679477248, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
73728, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 679477248, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "dc4db64c4ae40646745079d1a6c45b97" |
|
}, |
|
{ |
|
"dataPath": "params_shard_51.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
8192, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4f77afd133ed31ce97d662c303318f8b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_52.bin", |
|
"format": "raw-shard", |
|
"nbytes": 37748736, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.self_attn.o_proj.weight", |
|
"shape": [ |
|
4608, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 37748736, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "29aaabfda8a389edea789a19b10efd58" |
|
}, |
|
{ |
|
"dataPath": "params_shard_53.bin", |
|
"format": "raw-shard", |
|
"nbytes": 339738624, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.mlp.down_proj.weight", |
|
"shape": [ |
|
4608, |
|
36864 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 339738624, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "876710b458576180c5bdd37723a12cff" |
|
}, |
|
{ |
|
"dataPath": "params_shard_54.bin", |
|
"format": "raw-shard", |
|
"nbytes": 679477248, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
73728, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 679477248, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "edf947b8a8028c4e7278f496746a3d69" |
|
}, |
|
{ |
|
"dataPath": "params_shard_55.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
8192, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "65a0be861a8dc6fdcf2e7c2353637e3f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_56.bin", |
|
"format": "raw-shard", |
|
"nbytes": 37748736, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.self_attn.o_proj.weight", |
|
"shape": [ |
|
4608, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 37748736, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f7c84cdbd8fcbb6624d9bb17252cd1e2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_57.bin", |
|
"format": "raw-shard", |
|
"nbytes": 339738624, |
|
"records": [ |
|
{ |
|
"name": "model.layers.18.mlp.down_proj.weight", |
|
"shape": [ |
|
4608, |
|
36864 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 339738624, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8a1f6df7d173c58177b6ff49e44b6f60" |
|
}, |
|
{ |
|
"dataPath": "params_shard_58.bin", |
|
"format": "raw-shard", |
|
"nbytes": 679477248, |
|
"records": [ |
|
{ |
|
"name": "model.layers.18.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
73728, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 679477248, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2f7c9d760ed57a0a963a975739bceba8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_59.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.18.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
8192, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "50c66672f6e350aa32133092f67e2325" |
|
}, |
|
{ |
|
"dataPath": "params_shard_60.bin", |
|
"format": "raw-shard", |
|
"nbytes": 37748736, |
|
"records": [ |
|
{ |
|
"name": "model.layers.18.self_attn.o_proj.weight", |
|
"shape": [ |
|
4608, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 37748736, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "81798ecdb577f945f643e4e41d674c2c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_61.bin", |
|
"format": "raw-shard", |
|
"nbytes": 679477248, |
|
"records": [ |
|
{ |
|
"name": "model.layers.19.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
73728, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 679477248, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ce6aaf2fd04fdf82e550b9c3b3b0a986" |
|
}, |
|
{ |
|
"dataPath": "params_shard_62.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.19.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
8192, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "697236882302cf7618eb6a231156af19" |
|
}, |
|
{ |
|
"dataPath": "params_shard_63.bin", |
|
"format": "raw-shard", |
|
"nbytes": 37748736, |
|
"records": [ |
|
{ |
|
"name": "model.layers.19.self_attn.o_proj.weight", |
|
"shape": [ |
|
4608, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 37748736, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ccd5b13f6da68b770003bae7acd90081" |
|
}, |
|
{ |
|
"dataPath": "params_shard_64.bin", |
|
"format": "raw-shard", |
|
"nbytes": 339738624, |
|
"records": [ |
|
{ |
|
"name": "model.layers.19.mlp.down_proj.weight", |
|
"shape": [ |
|
4608, |
|
36864 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 339738624, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "87f8c1eb1c0635f3c6a45fdedfdab1c0" |
|
}, |
|
{ |
|
"dataPath": "params_shard_65.bin", |
|
"format": "raw-shard", |
|
"nbytes": 339738624, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.mlp.down_proj.weight", |
|
"shape": [ |
|
4608, |
|
36864 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 339738624, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0839d1f19482eaea044426792f2abad2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_66.bin", |
|
"format": "raw-shard", |
|
"nbytes": 679477248, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
73728, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 679477248, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b1f469791c6f561d7ab5378b3c573b68" |
|
}, |
|
{ |
|
"dataPath": "params_shard_67.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
8192, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ad5e4eacb1548fdc4b2fa0c884cd2b5b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_68.bin", |
|
"format": "raw-shard", |
|
"nbytes": 37748736, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.self_attn.o_proj.weight", |
|
"shape": [ |
|
4608, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 37748736, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "056e0b725496bb0a1dee4375e1f60900" |
|
}, |
|
{ |
|
"dataPath": "params_shard_69.bin", |
|
"format": "raw-shard", |
|
"nbytes": 339738624, |
|
"records": [ |
|
{ |
|
"name": "model.layers.21.mlp.down_proj.weight", |
|
"shape": [ |
|
4608, |
|
36864 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 339738624, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "00e7aea26a88ccb6e27fc43ee0b596c6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_70.bin", |
|
"format": "raw-shard", |
|
"nbytes": 679477248, |
|
"records": [ |
|
{ |
|
"name": "model.layers.21.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
73728, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 679477248, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "632e2a9805f500bd67b9c8b46075ec96" |
|
}, |
|
{ |
|
"dataPath": "params_shard_71.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.21.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
8192, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a19d25593152024db2cdb58653521249" |
|
}, |
|
{ |
|
"dataPath": "params_shard_72.bin", |
|
"format": "raw-shard", |
|
"nbytes": 37748736, |
|
"records": [ |
|
{ |
|
"name": "model.layers.21.self_attn.o_proj.weight", |
|
"shape": [ |
|
4608, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 37748736, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e1521585a47d49646f32f00cfeb28f13" |
|
}, |
|
{ |
|
"dataPath": "params_shard_73.bin", |
|
"format": "raw-shard", |
|
"nbytes": 339738624, |
|
"records": [ |
|
{ |
|
"name": "model.layers.22.mlp.down_proj.weight", |
|
"shape": [ |
|
4608, |
|
36864 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 339738624, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f7b5cf942e2ad27cb31b3d86bb934740" |
|
}, |
|
{ |
|
"dataPath": "params_shard_74.bin", |
|
"format": "raw-shard", |
|
"nbytes": 679477248, |
|
"records": [ |
|
{ |
|
"name": "model.layers.22.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
73728, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 679477248, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "bc20a1b9ce0d50def914fa3283740755" |
|
}, |
|
{ |
|
"dataPath": "params_shard_75.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.22.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
8192, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "181b6f0f812244ed28f845443394c2c3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_76.bin", |
|
"format": "raw-shard", |
|
"nbytes": 37748736, |
|
"records": [ |
|
{ |
|
"name": "model.layers.22.self_attn.o_proj.weight", |
|
"shape": [ |
|
4608, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 37748736, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ccc07720caf4878f8ab28a00247574eb" |
|
}, |
|
{ |
|
"dataPath": "params_shard_77.bin", |
|
"format": "raw-shard", |
|
"nbytes": 679477248, |
|
"records": [ |
|
{ |
|
"name": "model.layers.23.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
73728, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 679477248, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6e880ad3a769101bf5d42b3b2bd30428" |
|
}, |
|
{ |
|
"dataPath": "params_shard_78.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.23.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
8192, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ae6d7000e8302c78c975881680afb875" |
|
}, |
|
{ |
|
"dataPath": "params_shard_79.bin", |
|
"format": "raw-shard", |
|
"nbytes": 37748736, |
|
"records": [ |
|
{ |
|
"name": "model.layers.23.self_attn.o_proj.weight", |
|
"shape": [ |
|
4608, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 37748736, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f0f8c0b3791755ffcb3e2bab3629b32e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_80.bin", |
|
"format": "raw-shard", |
|
"nbytes": 339738624, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.mlp.down_proj.weight", |
|
"shape": [ |
|
4608, |
|
36864 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 339738624, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c6a1621ea851755cc1c676c40ba8396c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_81.bin", |
|
"format": "raw-shard", |
|
"nbytes": 679477248, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
73728, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 679477248, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8ed9f65c0b647b2794a456560a2a22e6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_82.bin", |
|
"format": "raw-shard", |
|
"nbytes": 339738624, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.mlp.down_proj.weight", |
|
"shape": [ |
|
4608, |
|
36864 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 339738624, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "58e2d4f9838fd2ddab6ce01a60a1c025" |
|
}, |
|
{ |
|
"dataPath": "params_shard_83.bin", |
|
"format": "raw-shard", |
|
"nbytes": 679477248, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
73728, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 679477248, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a6b70b5bafedb22e5e6355741eb50fa3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_84.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
8192, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3093327e75aa189e4c1d62964f796cfb" |
|
}, |
|
{ |
|
"dataPath": "params_shard_85.bin", |
|
"format": "raw-shard", |
|
"nbytes": 37748736, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.self_attn.o_proj.weight", |
|
"shape": [ |
|
4608, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 37748736, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "fc87c5e22378d6e9057ffeee62b139ff" |
|
}, |
|
{ |
|
"dataPath": "params_shard_86.bin", |
|
"format": "raw-shard", |
|
"nbytes": 339738624, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.mlp.down_proj.weight", |
|
"shape": [ |
|
4608, |
|
36864 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 339738624, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2d7be78a34ea444d640f252063277c3e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_87.bin", |
|
"format": "raw-shard", |
|
"nbytes": 679477248, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
73728, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 679477248, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7d86ef778cd55b79b4df50d4c34e8deb" |
|
}, |
|
{ |
|
"dataPath": "params_shard_88.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
8192, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3a65d46a8dca4899ac05f0a2e382c3dd" |
|
}, |
|
{ |
|
"dataPath": "params_shard_89.bin", |
|
"format": "raw-shard", |
|
"nbytes": 37748736, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.self_attn.o_proj.weight", |
|
"shape": [ |
|
4608, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 37748736, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a60ab57c6d1ce3da7030047c7b491e70" |
|
}, |
|
{ |
|
"dataPath": "params_shard_90.bin", |
|
"format": "raw-shard", |
|
"nbytes": 339738624, |
|
"records": [ |
|
{ |
|
"name": "model.layers.5.mlp.down_proj.weight", |
|
"shape": [ |
|
4608, |
|
36864 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 339738624, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f27c1dc563fd7050411badbbd667e1c5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_91.bin", |
|
"format": "raw-shard", |
|
"nbytes": 679477248, |
|
"records": [ |
|
{ |
|
"name": "model.layers.5.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
73728, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 679477248, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "73b24ad60ab31462446ce6602fa1a44e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_92.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.5.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
8192, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e831fef8e15fdf98ea271c1f80748476" |
|
}, |
|
{ |
|
"dataPath": "params_shard_93.bin", |
|
"format": "raw-shard", |
|
"nbytes": 37748736, |
|
"records": [ |
|
{ |
|
"name": "model.layers.5.self_attn.o_proj.weight", |
|
"shape": [ |
|
4608, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 37748736, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f1d2b99785dc024c66e656607780a26d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_94.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
8192, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "471f65297ef8e62ff152af3fbbcb29af" |
|
}, |
|
{ |
|
"dataPath": "params_shard_95.bin", |
|
"format": "raw-shard", |
|
"nbytes": 37748736, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.self_attn.o_proj.weight", |
|
"shape": [ |
|
4608, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 37748736, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9f02c1eff8688c1446ed046a4e94d294" |
|
}, |
|
{ |
|
"dataPath": "params_shard_96.bin", |
|
"format": "raw-shard", |
|
"nbytes": 339738624, |
|
"records": [ |
|
{ |
|
"name": "model.layers.23.mlp.down_proj.weight", |
|
"shape": [ |
|
4608, |
|
36864 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 339738624, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "299eec1f29838a1619f651b8ac2c2d2a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_97.bin", |
|
"format": "raw-shard", |
|
"nbytes": 339738624, |
|
"records": [ |
|
{ |
|
"name": "model.layers.24.mlp.down_proj.weight", |
|
"shape": [ |
|
4608, |
|
36864 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 339738624, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "48ee8aa14b6bedbf74ea626ccc6b66a9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_98.bin", |
|
"format": "raw-shard", |
|
"nbytes": 679477248, |
|
"records": [ |
|
{ |
|
"name": "model.layers.24.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
73728, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 679477248, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1d8c70a2fc2694d951a55a7614f2ee8f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_99.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.24.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
8192, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f2c72b1c908d4fe944fdab3339eaf781" |
|
}, |
|
{ |
|
"dataPath": "params_shard_100.bin", |
|
"format": "raw-shard", |
|
"nbytes": 37748736, |
|
"records": [ |
|
{ |
|
"name": "model.layers.24.self_attn.o_proj.weight", |
|
"shape": [ |
|
4608, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 37748736, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "15c479aacfa31f1227b563789686152d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_101.bin", |
|
"format": "raw-shard", |
|
"nbytes": 339738624, |
|
"records": [ |
|
{ |
|
"name": "model.layers.25.mlp.down_proj.weight", |
|
"shape": [ |
|
4608, |
|
36864 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 339738624, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "24de14c07f42cb9d7142f9085470c02a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_102.bin", |
|
"format": "raw-shard", |
|
"nbytes": 679477248, |
|
"records": [ |
|
{ |
|
"name": "model.layers.25.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
73728, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 679477248, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "561db5e2c4e9b20a00497d459e65fb0f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_103.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.25.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
8192, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1a8a3aec61c5bef5e6e7942fc2ba31b0" |
|
}, |
|
{ |
|
"dataPath": "params_shard_104.bin", |
|
"format": "raw-shard", |
|
"nbytes": 37748736, |
|
"records": [ |
|
{ |
|
"name": "model.layers.25.self_attn.o_proj.weight", |
|
"shape": [ |
|
4608, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 37748736, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d4b800d82bb141c8a3f0531e7dc8362f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_105.bin", |
|
"format": "raw-shard", |
|
"nbytes": 339738624, |
|
"records": [ |
|
{ |
|
"name": "model.layers.26.mlp.down_proj.weight", |
|
"shape": [ |
|
4608, |
|
36864 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 339738624, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5f1d312f5a8aff7f31d45f49dba6b47a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_106.bin", |
|
"format": "raw-shard", |
|
"nbytes": 679477248, |
|
"records": [ |
|
{ |
|
"name": "model.layers.26.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
73728, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 679477248, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a68eb722f2505b2775c3b27591c531d3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_107.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.26.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
8192, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2e82c8779b7d5498268a303a1fe33c42" |
|
}, |
|
{ |
|
"dataPath": "params_shard_108.bin", |
|
"format": "raw-shard", |
|
"nbytes": 37748736, |
|
"records": [ |
|
{ |
|
"name": "model.layers.26.self_attn.o_proj.weight", |
|
"shape": [ |
|
4608, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 37748736, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d196249f35fa960330bf61163eaf14e6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_109.bin", |
|
"format": "raw-shard", |
|
"nbytes": 339738624, |
|
"records": [ |
|
{ |
|
"name": "model.layers.27.mlp.down_proj.weight", |
|
"shape": [ |
|
4608, |
|
36864 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 339738624, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4915ebff7e72c5278a55d0525ae23bcf" |
|
}, |
|
{ |
|
"dataPath": "params_shard_110.bin", |
|
"format": "raw-shard", |
|
"nbytes": 679477248, |
|
"records": [ |
|
{ |
|
"name": "model.layers.27.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
73728, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 679477248, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "804a60e642f3dad08c7fcafd2637a225" |
|
}, |
|
{ |
|
"dataPath": "params_shard_111.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.27.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
8192, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "55b2212a596e3f28e9ef49f4e1d58f85" |
|
}, |
|
{ |
|
"dataPath": "params_shard_112.bin", |
|
"format": "raw-shard", |
|
"nbytes": 37748736, |
|
"records": [ |
|
{ |
|
"name": "model.layers.27.self_attn.o_proj.weight", |
|
"shape": [ |
|
4608, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 37748736, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "04ce72e3c3f6f7aa3771e2c9941f9092" |
|
}, |
|
{ |
|
"dataPath": "params_shard_113.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.28.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
8192, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "99482aea32b7ab638410a48d1e389f61" |
|
}, |
|
{ |
|
"dataPath": "params_shard_114.bin", |
|
"format": "raw-shard", |
|
"nbytes": 37748736, |
|
"records": [ |
|
{ |
|
"name": "model.layers.28.self_attn.o_proj.weight", |
|
"shape": [ |
|
4608, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 37748736, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b0e38e30df4691211db0690a94039541" |
|
}, |
|
{ |
|
"dataPath": "params_shard_115.bin", |
|
"format": "raw-shard", |
|
"nbytes": 339738624, |
|
"records": [ |
|
{ |
|
"name": "model.layers.28.mlp.down_proj.weight", |
|
"shape": [ |
|
4608, |
|
36864 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 339738624, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "628902c3c3c8e71990dccd546e73079b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_116.bin", |
|
"format": "raw-shard", |
|
"nbytes": 679477248, |
|
"records": [ |
|
{ |
|
"name": "model.layers.28.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
73728, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 679477248, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0249fa2584c4e66e6217d6b4f2aa673c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_117.bin", |
|
"format": "raw-shard", |
|
"nbytes": 339738624, |
|
"records": [ |
|
{ |
|
"name": "model.layers.29.mlp.down_proj.weight", |
|
"shape": [ |
|
4608, |
|
36864 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 339738624, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7e7ded135f939f346bdf016d8457982e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_118.bin", |
|
"format": "raw-shard", |
|
"nbytes": 679477248, |
|
"records": [ |
|
{ |
|
"name": "model.layers.29.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
73728, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 679477248, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d33319299cd0ce8206cb31d488a0d57f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_119.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.29.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
8192, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "69703b0f869124b81aa722f5455e240a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_120.bin", |
|
"format": "raw-shard", |
|
"nbytes": 37748736, |
|
"records": [ |
|
{ |
|
"name": "model.layers.29.self_attn.o_proj.weight", |
|
"shape": [ |
|
4608, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 37748736, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "535ca2d671d58d7416a81e4843331b09" |
|
}, |
|
{ |
|
"dataPath": "params_shard_121.bin", |
|
"format": "raw-shard", |
|
"nbytes": 339738624, |
|
"records": [ |
|
{ |
|
"name": "model.layers.30.mlp.down_proj.weight", |
|
"shape": [ |
|
4608, |
|
36864 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 339738624, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1b2c8133ed20173bab22eedac23723c2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_122.bin", |
|
"format": "raw-shard", |
|
"nbytes": 679477248, |
|
"records": [ |
|
{ |
|
"name": "model.layers.30.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
73728, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 679477248, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b47ca8b0c1b07aa6165cb3a6f9aee351" |
|
}, |
|
{ |
|
"dataPath": "params_shard_123.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.30.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
8192, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "05d454a922c916dadf49c4c393f330fd" |
|
}, |
|
{ |
|
"dataPath": "params_shard_124.bin", |
|
"format": "raw-shard", |
|
"nbytes": 37748736, |
|
"records": [ |
|
{ |
|
"name": "model.layers.30.self_attn.o_proj.weight", |
|
"shape": [ |
|
4608, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 37748736, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "63856a22d3fe94c5e7b826a664d8fe16" |
|
}, |
|
{ |
|
"dataPath": "params_shard_125.bin", |
|
"format": "raw-shard", |
|
"nbytes": 339738624, |
|
"records": [ |
|
{ |
|
"name": "model.layers.31.mlp.down_proj.weight", |
|
"shape": [ |
|
4608, |
|
36864 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 339738624, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f189245c12adcdbabb594dfd63004834" |
|
}, |
|
{ |
|
"dataPath": "params_shard_126.bin", |
|
"format": "raw-shard", |
|
"nbytes": 679477248, |
|
"records": [ |
|
{ |
|
"name": "model.layers.31.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
73728, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 679477248, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d783d0c7f7ce0d9ab67ca8cc88683a72" |
|
}, |
|
{ |
|
"dataPath": "params_shard_127.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.31.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
8192, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "37b47716e6cab80d51d2ef22da02423a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_128.bin", |
|
"format": "raw-shard", |
|
"nbytes": 37748736, |
|
"records": [ |
|
{ |
|
"name": "model.layers.31.self_attn.o_proj.weight", |
|
"shape": [ |
|
4608, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 37748736, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "93136414d590df68e40cd9b7c8d54642" |
|
}, |
|
{ |
|
"dataPath": "params_shard_129.bin", |
|
"format": "raw-shard", |
|
"nbytes": 679477248, |
|
"records": [ |
|
{ |
|
"name": "model.layers.32.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
73728, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 679477248, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8cebf3bf42c0be0fe4615e091045e5f1" |
|
}, |
|
{ |
|
"dataPath": "params_shard_130.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.32.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
8192, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d4a7221be2b3875d63bc943db6c7e9ea" |
|
}, |
|
{ |
|
"dataPath": "params_shard_131.bin", |
|
"format": "raw-shard", |
|
"nbytes": 37748736, |
|
"records": [ |
|
{ |
|
"name": "model.layers.32.self_attn.o_proj.weight", |
|
"shape": [ |
|
4608, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 37748736, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "51482b34c93adf5591e4d20eb2fe9cc5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_132.bin", |
|
"format": "raw-shard", |
|
"nbytes": 339738624, |
|
"records": [ |
|
{ |
|
"name": "model.layers.32.mlp.down_proj.weight", |
|
"shape": [ |
|
4608, |
|
36864 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 339738624, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a938aa19af2ac5e1a7be4a9d62afbb4a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_133.bin", |
|
"format": "raw-shard", |
|
"nbytes": 339738624, |
|
"records": [ |
|
{ |
|
"name": "model.layers.33.mlp.down_proj.weight", |
|
"shape": [ |
|
4608, |
|
36864 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 339738624, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "48de06f22b7d9f7e1062969b08f59bee" |
|
}, |
|
{ |
|
"dataPath": "params_shard_134.bin", |
|
"format": "raw-shard", |
|
"nbytes": 679477248, |
|
"records": [ |
|
{ |
|
"name": "model.layers.33.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
73728, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 679477248, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c3cd145b1d52e5a789bacd5852d757cf" |
|
}, |
|
{ |
|
"dataPath": "params_shard_135.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.33.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
8192, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c5e75c19618091f7cc441e82cacdb178" |
|
}, |
|
{ |
|
"dataPath": "params_shard_136.bin", |
|
"format": "raw-shard", |
|
"nbytes": 37748736, |
|
"records": [ |
|
{ |
|
"name": "model.layers.33.self_attn.o_proj.weight", |
|
"shape": [ |
|
4608, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 37748736, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a9328660ee78726d415ecedb73a05595" |
|
}, |
|
{ |
|
"dataPath": "params_shard_137.bin", |
|
"format": "raw-shard", |
|
"nbytes": 339738624, |
|
"records": [ |
|
{ |
|
"name": "model.layers.34.mlp.down_proj.weight", |
|
"shape": [ |
|
4608, |
|
36864 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 339738624, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b5400774e11eb7479e59682fcf48ca2f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_138.bin", |
|
"format": "raw-shard", |
|
"nbytes": 679477248, |
|
"records": [ |
|
{ |
|
"name": "model.layers.34.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
73728, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 679477248, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a592b06aeec1f5c5e9b8fed80352647e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_139.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.34.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
8192, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "550804e4f5d0aa2c564f8a6d370914af" |
|
}, |
|
{ |
|
"dataPath": "params_shard_140.bin", |
|
"format": "raw-shard", |
|
"nbytes": 37748736, |
|
"records": [ |
|
{ |
|
"name": "model.layers.34.self_attn.o_proj.weight", |
|
"shape": [ |
|
4608, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 37748736, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a535254722180c0a0e340615721be681" |
|
}, |
|
{ |
|
"dataPath": "params_shard_141.bin", |
|
"format": "raw-shard", |
|
"nbytes": 339738624, |
|
"records": [ |
|
{ |
|
"name": "model.layers.35.mlp.down_proj.weight", |
|
"shape": [ |
|
4608, |
|
36864 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 339738624, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d19ca4e5552327dbf359da82fff07851" |
|
}, |
|
{ |
|
"dataPath": "params_shard_142.bin", |
|
"format": "raw-shard", |
|
"nbytes": 679477248, |
|
"records": [ |
|
{ |
|
"name": "model.layers.35.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
73728, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 679477248, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d84a53918f7caa3fc1d55e90c5ff3063" |
|
}, |
|
{ |
|
"dataPath": "params_shard_143.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.35.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
8192, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "75ff61a4d93ae52a38cee962586e09d4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_144.bin", |
|
"format": "raw-shard", |
|
"nbytes": 37748736, |
|
"records": [ |
|
{ |
|
"name": "model.layers.35.self_attn.o_proj.weight", |
|
"shape": [ |
|
4608, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 37748736, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4b3d7f04e78cdea4fd912782085059fe" |
|
}, |
|
{ |
|
"dataPath": "params_shard_145.bin", |
|
"format": "raw-shard", |
|
"nbytes": 679477248, |
|
"records": [ |
|
{ |
|
"name": "model.layers.36.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
73728, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 679477248, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2f77cea9990c9b182caaf01fc65731aa" |
|
}, |
|
{ |
|
"dataPath": "params_shard_146.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.36.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
8192, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "bf682f566b1f0f005d98b2165f706985" |
|
}, |
|
{ |
|
"dataPath": "params_shard_147.bin", |
|
"format": "raw-shard", |
|
"nbytes": 37748736, |
|
"records": [ |
|
{ |
|
"name": "model.layers.36.self_attn.o_proj.weight", |
|
"shape": [ |
|
4608, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 37748736, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f6e31dad96223a2203e3abc4aeab63b6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_148.bin", |
|
"format": "raw-shard", |
|
"nbytes": 339738624, |
|
"records": [ |
|
{ |
|
"name": "model.layers.36.mlp.down_proj.weight", |
|
"shape": [ |
|
4608, |
|
36864 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 339738624, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f57d8dc2aeace971196913bdb3bb20a7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_149.bin", |
|
"format": "raw-shard", |
|
"nbytes": 339738624, |
|
"records": [ |
|
{ |
|
"name": "model.layers.37.mlp.down_proj.weight", |
|
"shape": [ |
|
4608, |
|
36864 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 339738624, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "43f05564b30a41d16b74a9a2a58461c5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_150.bin", |
|
"format": "raw-shard", |
|
"nbytes": 679477248, |
|
"records": [ |
|
{ |
|
"name": "model.layers.37.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
73728, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 679477248, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "15ef6e151987113b53eb6b8bf553cb93" |
|
}, |
|
{ |
|
"dataPath": "params_shard_151.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.37.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
8192, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ba6c983dcd13cb7ebe67393fddb18ebd" |
|
}, |
|
{ |
|
"dataPath": "params_shard_152.bin", |
|
"format": "raw-shard", |
|
"nbytes": 37748736, |
|
"records": [ |
|
{ |
|
"name": "model.layers.37.self_attn.o_proj.weight", |
|
"shape": [ |
|
4608, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 37748736, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "89b326f0bbc270c5d848211b33f75282" |
|
}, |
|
{ |
|
"dataPath": "params_shard_153.bin", |
|
"format": "raw-shard", |
|
"nbytes": 339738624, |
|
"records": [ |
|
{ |
|
"name": "model.layers.38.mlp.down_proj.weight", |
|
"shape": [ |
|
4608, |
|
36864 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 339738624, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6d6c98526eeecee414755d1b9eb218c2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_154.bin", |
|
"format": "raw-shard", |
|
"nbytes": 679477248, |
|
"records": [ |
|
{ |
|
"name": "model.layers.38.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
73728, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 679477248, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "efc27d1bfeeae5a55d187023d814fda7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_155.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.38.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
8192, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "89d55c72ca5f2455acdf2a47c3c206fd" |
|
}, |
|
{ |
|
"dataPath": "params_shard_156.bin", |
|
"format": "raw-shard", |
|
"nbytes": 37748736, |
|
"records": [ |
|
{ |
|
"name": "model.layers.38.self_attn.o_proj.weight", |
|
"shape": [ |
|
4608, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 37748736, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "53463953a04a04caaa844b11a083b559" |
|
}, |
|
{ |
|
"dataPath": "params_shard_157.bin", |
|
"format": "raw-shard", |
|
"nbytes": 339738624, |
|
"records": [ |
|
{ |
|
"name": "model.layers.39.mlp.down_proj.weight", |
|
"shape": [ |
|
4608, |
|
36864 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 339738624, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e2f625fccb46df5fe8783092cf1f38b8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_158.bin", |
|
"format": "raw-shard", |
|
"nbytes": 679477248, |
|
"records": [ |
|
{ |
|
"name": "model.layers.39.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
73728, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 679477248, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4e18e425f8893842443b76e0f065048f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_159.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.39.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
8192, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2d936649571d6961692ddc69d0a5db70" |
|
}, |
|
{ |
|
"dataPath": "params_shard_160.bin", |
|
"format": "raw-shard", |
|
"nbytes": 37748736, |
|
"records": [ |
|
{ |
|
"name": "model.layers.39.self_attn.o_proj.weight", |
|
"shape": [ |
|
4608, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 37748736, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "bc34cc6fe4a37feac389b36fe18dd2d0" |
|
}, |
|
{ |
|
"dataPath": "params_shard_161.bin", |
|
"format": "raw-shard", |
|
"nbytes": 339738624, |
|
"records": [ |
|
{ |
|
"name": "model.layers.40.mlp.down_proj.weight", |
|
"shape": [ |
|
4608, |
|
36864 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 339738624, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "03831ad4f8f9ae173b2f5c7d07dafa1a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_162.bin", |
|
"format": "raw-shard", |
|
"nbytes": 679477248, |
|
"records": [ |
|
{ |
|
"name": "model.layers.40.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
73728, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 679477248, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c8e9e9fc07c29838d7c8f03606f6867b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_163.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.40.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
8192, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1e11442984dc3747db6f25e2bfbfd5d2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_164.bin", |
|
"format": "raw-shard", |
|
"nbytes": 37748736, |
|
"records": [ |
|
{ |
|
"name": "model.layers.40.self_attn.o_proj.weight", |
|
"shape": [ |
|
4608, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 37748736, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6b4b772980ae3f6c831e1cdbcb29d787" |
|
}, |
|
{ |
|
"dataPath": "params_shard_165.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.41.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
8192, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1ad9afaadf62bf00e595cdd08e203696" |
|
}, |
|
{ |
|
"dataPath": "params_shard_166.bin", |
|
"format": "raw-shard", |
|
"nbytes": 37748736, |
|
"records": [ |
|
{ |
|
"name": "model.layers.41.self_attn.o_proj.weight", |
|
"shape": [ |
|
4608, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 37748736, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "892bc0ed228f6ffee8747838ff18ef18" |
|
}, |
|
{ |
|
"dataPath": "params_shard_167.bin", |
|
"format": "raw-shard", |
|
"nbytes": 339738624, |
|
"records": [ |
|
{ |
|
"name": "model.layers.41.mlp.down_proj.weight", |
|
"shape": [ |
|
4608, |
|
36864 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 339738624, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b7eff25eec698c74beb6c2e815a5c979" |
|
}, |
|
{ |
|
"dataPath": "params_shard_168.bin", |
|
"format": "raw-shard", |
|
"nbytes": 679477248, |
|
"records": [ |
|
{ |
|
"name": "model.layers.41.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
73728, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 679477248, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "80d03d9a7dd3d5cdc45175f715abc76f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_169.bin", |
|
"format": "raw-shard", |
|
"nbytes": 339738624, |
|
"records": [ |
|
{ |
|
"name": "model.layers.42.mlp.down_proj.weight", |
|
"shape": [ |
|
4608, |
|
36864 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 339738624, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "35370a4adb178622ed43e4d56c5c934e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_170.bin", |
|
"format": "raw-shard", |
|
"nbytes": 679477248, |
|
"records": [ |
|
{ |
|
"name": "model.layers.42.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
73728, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 679477248, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "368303116313404fd25eed9653c855d7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_171.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.42.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
8192, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "47d4437856896ed12b829a78bed5a6eb" |
|
}, |
|
{ |
|
"dataPath": "params_shard_172.bin", |
|
"format": "raw-shard", |
|
"nbytes": 37748736, |
|
"records": [ |
|
{ |
|
"name": "model.layers.42.self_attn.o_proj.weight", |
|
"shape": [ |
|
4608, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 37748736, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5adebc49547de2e6f7bb6851b03f028a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_173.bin", |
|
"format": "raw-shard", |
|
"nbytes": 339738624, |
|
"records": [ |
|
{ |
|
"name": "model.layers.43.mlp.down_proj.weight", |
|
"shape": [ |
|
4608, |
|
36864 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 339738624, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2e6dcc7e7418af3d7fb92d5aa42c4d08" |
|
}, |
|
{ |
|
"dataPath": "params_shard_174.bin", |
|
"format": "raw-shard", |
|
"nbytes": 679477248, |
|
"records": [ |
|
{ |
|
"name": "model.layers.43.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
73728, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 679477248, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "432b8c18203509aacdd770908d566e7e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_175.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.43.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
8192, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6f0c116284eb0dc45ef46dad7746aa1c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_176.bin", |
|
"format": "raw-shard", |
|
"nbytes": 37748736, |
|
"records": [ |
|
{ |
|
"name": "model.layers.43.self_attn.o_proj.weight", |
|
"shape": [ |
|
4608, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 37748736, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f6ba3f7b643bec7f8446f04217afb0b9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_177.bin", |
|
"format": "raw-shard", |
|
"nbytes": 339738624, |
|
"records": [ |
|
{ |
|
"name": "model.layers.44.mlp.down_proj.weight", |
|
"shape": [ |
|
4608, |
|
36864 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 339738624, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e398958a8be1e10a8478d1d2e15fb14c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_178.bin", |
|
"format": "raw-shard", |
|
"nbytes": 679477248, |
|
"records": [ |
|
{ |
|
"name": "model.layers.44.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
73728, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 679477248, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0c38211e4f037b8d63443123ec8d8496" |
|
}, |
|
{ |
|
"dataPath": "params_shard_179.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.44.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
8192, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c58f41bfdbd0c5d432dd75ae2225c817" |
|
}, |
|
{ |
|
"dataPath": "params_shard_180.bin", |
|
"format": "raw-shard", |
|
"nbytes": 37748736, |
|
"records": [ |
|
{ |
|
"name": "model.layers.44.self_attn.o_proj.weight", |
|
"shape": [ |
|
4608, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 37748736, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1fa5a61b58c9f02732d4e1463d092599" |
|
}, |
|
{ |
|
"dataPath": "params_shard_181.bin", |
|
"format": "raw-shard", |
|
"nbytes": 679477248, |
|
"records": [ |
|
{ |
|
"name": "model.layers.45.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
73728, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 679477248, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "68b213f87a3df7334432a82dfd68ca8b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_182.bin", |
|
"format": "raw-shard", |
|
"nbytes": 75497472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.45.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
8192, |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 75497472, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2b47a8861441bdf53f9dac6ce02980e3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_183.bin", |
|
"format": "raw-shard", |
|
"nbytes": 37748736, |
|
"records": [ |
|
{ |
|
"name": "model.layers.45.self_attn.o_proj.weight", |
|
"shape": [ |
|
4608, |
|
4096 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 37748736, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "21bf969aca87c0a3b40035d5cddf4b68" |
|
}, |
|
{ |
|
"dataPath": "params_shard_184.bin", |
|
"format": "raw-shard", |
|
"nbytes": 339738624, |
|
"records": [ |
|
{ |
|
"name": "model.layers.45.mlp.down_proj.weight", |
|
"shape": [ |
|
4608, |
|
36864 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 339738624, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "dbd18d3a38c3bad9a0b9198a90fa9760" |
|
}, |
|
{ |
|
"dataPath": "params_shard_185.bin", |
|
"format": "raw-shard", |
|
"nbytes": 1704960, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.input_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.0.post_attention_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 9216 |
|
}, |
|
{ |
|
"name": "model.layers.0.post_feedforward_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 18432 |
|
}, |
|
{ |
|
"name": "model.layers.0.pre_feedforward_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 27648 |
|
}, |
|
{ |
|
"name": "model.layers.1.input_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 36864 |
|
}, |
|
{ |
|
"name": "model.layers.1.post_attention_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 46080 |
|
}, |
|
{ |
|
"name": "model.layers.1.post_feedforward_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 55296 |
|
}, |
|
{ |
|
"name": "model.layers.1.pre_feedforward_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 64512 |
|
}, |
|
{ |
|
"name": "model.layers.10.input_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 73728 |
|
}, |
|
{ |
|
"name": "model.layers.10.post_attention_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 82944 |
|
}, |
|
{ |
|
"name": "model.layers.10.post_feedforward_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 92160 |
|
}, |
|
{ |
|
"name": "model.layers.10.pre_feedforward_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 101376 |
|
}, |
|
{ |
|
"name": "model.layers.11.input_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 110592 |
|
}, |
|
{ |
|
"name": "model.layers.11.post_attention_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 119808 |
|
}, |
|
{ |
|
"name": "model.layers.11.post_feedforward_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 129024 |
|
}, |
|
{ |
|
"name": "model.layers.11.pre_feedforward_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 138240 |
|
}, |
|
{ |
|
"name": "model.layers.12.input_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 147456 |
|
}, |
|
{ |
|
"name": "model.layers.12.post_attention_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 156672 |
|
}, |
|
{ |
|
"name": "model.layers.12.post_feedforward_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 165888 |
|
}, |
|
{ |
|
"name": "model.layers.12.pre_feedforward_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 175104 |
|
}, |
|
{ |
|
"name": "model.layers.13.input_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 184320 |
|
}, |
|
{ |
|
"name": "model.layers.13.post_attention_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 193536 |
|
}, |
|
{ |
|
"name": "model.layers.13.post_feedforward_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 202752 |
|
}, |
|
{ |
|
"name": "model.layers.13.pre_feedforward_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 211968 |
|
}, |
|
{ |
|
"name": "model.layers.14.input_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 221184 |
|
}, |
|
{ |
|
"name": "model.layers.14.post_attention_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 230400 |
|
}, |
|
{ |
|
"name": "model.layers.14.post_feedforward_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 239616 |
|
}, |
|
{ |
|
"name": "model.layers.14.pre_feedforward_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 248832 |
|
}, |
|
{ |
|
"name": "model.layers.6.input_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 258048 |
|
}, |
|
{ |
|
"name": "model.layers.6.post_attention_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 267264 |
|
}, |
|
{ |
|
"name": "model.layers.6.post_feedforward_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 276480 |
|
}, |
|
{ |
|
"name": "model.layers.6.pre_feedforward_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 285696 |
|
}, |
|
{ |
|
"name": "model.layers.7.input_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 294912 |
|
}, |
|
{ |
|
"name": "model.layers.7.post_attention_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 304128 |
|
}, |
|
{ |
|
"name": "model.layers.7.post_feedforward_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 313344 |
|
}, |
|
{ |
|
"name": "model.layers.7.pre_feedforward_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 322560 |
|
}, |
|
{ |
|
"name": "model.layers.8.input_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 331776 |
|
}, |
|
{ |
|
"name": "model.layers.8.post_attention_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 340992 |
|
}, |
|
{ |
|
"name": "model.layers.8.post_feedforward_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 350208 |
|
}, |
|
{ |
|
"name": "model.layers.8.pre_feedforward_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 359424 |
|
}, |
|
{ |
|
"name": "model.layers.9.input_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 368640 |
|
}, |
|
{ |
|
"name": "model.layers.9.post_attention_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 377856 |
|
}, |
|
{ |
|
"name": "model.layers.9.post_feedforward_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 387072 |
|
}, |
|
{ |
|
"name": "model.layers.9.pre_feedforward_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 396288 |
|
}, |
|
{ |
|
"name": "model.layers.15.input_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 405504 |
|
}, |
|
{ |
|
"name": "model.layers.15.post_attention_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 414720 |
|
}, |
|
{ |
|
"name": "model.layers.15.post_feedforward_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 423936 |
|
}, |
|
{ |
|
"name": "model.layers.15.pre_feedforward_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 433152 |
|
}, |
|
{ |
|
"name": "model.layers.16.input_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 442368 |
|
}, |
|
{ |
|
"name": "model.layers.16.post_attention_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 451584 |
|
}, |
|
{ |
|
"name": "model.layers.16.post_feedforward_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 460800 |
|
}, |
|
{ |
|
"name": "model.layers.16.pre_feedforward_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 470016 |
|
}, |
|
{ |
|
"name": "model.layers.17.input_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 479232 |
|
}, |
|
{ |
|
"name": "model.layers.17.post_attention_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 488448 |
|
}, |
|
{ |
|
"name": "model.layers.17.post_feedforward_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 497664 |
|
}, |
|
{ |
|
"name": "model.layers.17.pre_feedforward_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 506880 |
|
}, |
|
{ |
|
"name": "model.layers.18.input_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 516096 |
|
}, |
|
{ |
|
"name": "model.layers.18.post_attention_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 525312 |
|
}, |
|
{ |
|
"name": "model.layers.18.post_feedforward_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 534528 |
|
}, |
|
{ |
|
"name": "model.layers.18.pre_feedforward_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 543744 |
|
}, |
|
{ |
|
"name": "model.layers.19.input_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 552960 |
|
}, |
|
{ |
|
"name": "model.layers.19.post_attention_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 562176 |
|
}, |
|
{ |
|
"name": "model.layers.19.post_feedforward_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 571392 |
|
}, |
|
{ |
|
"name": "model.layers.19.pre_feedforward_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 580608 |
|
}, |
|
{ |
|
"name": "model.layers.20.input_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 589824 |
|
}, |
|
{ |
|
"name": "model.layers.20.post_attention_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 599040 |
|
}, |
|
{ |
|
"name": "model.layers.20.post_feedforward_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 608256 |
|
}, |
|
{ |
|
"name": "model.layers.20.pre_feedforward_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 617472 |
|
}, |
|
{ |
|
"name": "model.layers.21.input_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 626688 |
|
}, |
|
{ |
|
"name": "model.layers.21.post_attention_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 635904 |
|
}, |
|
{ |
|
"name": "model.layers.21.post_feedforward_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 645120 |
|
}, |
|
{ |
|
"name": "model.layers.21.pre_feedforward_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 654336 |
|
}, |
|
{ |
|
"name": "model.layers.22.input_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 663552 |
|
}, |
|
{ |
|
"name": "model.layers.22.post_attention_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 672768 |
|
}, |
|
{ |
|
"name": "model.layers.22.post_feedforward_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 681984 |
|
}, |
|
{ |
|
"name": "model.layers.22.pre_feedforward_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 691200 |
|
}, |
|
{ |
|
"name": "model.layers.2.input_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 700416 |
|
}, |
|
{ |
|
"name": "model.layers.2.post_attention_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 709632 |
|
}, |
|
{ |
|
"name": "model.layers.2.post_feedforward_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 718848 |
|
}, |
|
{ |
|
"name": "model.layers.2.pre_feedforward_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 728064 |
|
}, |
|
{ |
|
"name": "model.layers.3.input_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 737280 |
|
}, |
|
{ |
|
"name": "model.layers.3.post_attention_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 746496 |
|
}, |
|
{ |
|
"name": "model.layers.3.post_feedforward_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 755712 |
|
}, |
|
{ |
|
"name": "model.layers.3.pre_feedforward_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 764928 |
|
}, |
|
{ |
|
"name": "model.layers.4.input_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 774144 |
|
}, |
|
{ |
|
"name": "model.layers.4.post_attention_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 783360 |
|
}, |
|
{ |
|
"name": "model.layers.4.post_feedforward_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 792576 |
|
}, |
|
{ |
|
"name": "model.layers.4.pre_feedforward_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 801792 |
|
}, |
|
{ |
|
"name": "model.layers.5.input_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 811008 |
|
}, |
|
{ |
|
"name": "model.layers.5.post_attention_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 820224 |
|
}, |
|
{ |
|
"name": "model.layers.5.post_feedforward_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 829440 |
|
}, |
|
{ |
|
"name": "model.layers.5.pre_feedforward_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 838656 |
|
}, |
|
{ |
|
"name": "model.layers.23.input_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 847872 |
|
}, |
|
{ |
|
"name": "model.layers.23.post_attention_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 857088 |
|
}, |
|
{ |
|
"name": "model.layers.23.post_feedforward_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 866304 |
|
}, |
|
{ |
|
"name": "model.layers.23.pre_feedforward_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 875520 |
|
}, |
|
{ |
|
"name": "model.layers.24.input_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 884736 |
|
}, |
|
{ |
|
"name": "model.layers.24.post_attention_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 893952 |
|
}, |
|
{ |
|
"name": "model.layers.24.post_feedforward_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 903168 |
|
}, |
|
{ |
|
"name": "model.layers.24.pre_feedforward_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 912384 |
|
}, |
|
{ |
|
"name": "model.layers.25.input_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 921600 |
|
}, |
|
{ |
|
"name": "model.layers.25.post_attention_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 930816 |
|
}, |
|
{ |
|
"name": "model.layers.25.post_feedforward_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 940032 |
|
}, |
|
{ |
|
"name": "model.layers.25.pre_feedforward_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 949248 |
|
}, |
|
{ |
|
"name": "model.layers.26.input_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 958464 |
|
}, |
|
{ |
|
"name": "model.layers.26.post_attention_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 967680 |
|
}, |
|
{ |
|
"name": "model.layers.26.post_feedforward_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 976896 |
|
}, |
|
{ |
|
"name": "model.layers.26.pre_feedforward_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 986112 |
|
}, |
|
{ |
|
"name": "model.layers.27.input_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 995328 |
|
}, |
|
{ |
|
"name": "model.layers.27.post_attention_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 1004544 |
|
}, |
|
{ |
|
"name": "model.layers.27.post_feedforward_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 1013760 |
|
}, |
|
{ |
|
"name": "model.layers.27.pre_feedforward_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 1022976 |
|
}, |
|
{ |
|
"name": "model.layers.28.input_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 1032192 |
|
}, |
|
{ |
|
"name": "model.layers.28.post_attention_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 1041408 |
|
}, |
|
{ |
|
"name": "model.layers.28.post_feedforward_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 1050624 |
|
}, |
|
{ |
|
"name": "model.layers.28.pre_feedforward_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 1059840 |
|
}, |
|
{ |
|
"name": "model.layers.29.input_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 1069056 |
|
}, |
|
{ |
|
"name": "model.layers.29.post_attention_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 1078272 |
|
}, |
|
{ |
|
"name": "model.layers.29.post_feedforward_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 1087488 |
|
}, |
|
{ |
|
"name": "model.layers.29.pre_feedforward_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 1096704 |
|
}, |
|
{ |
|
"name": "model.layers.30.input_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 1105920 |
|
}, |
|
{ |
|
"name": "model.layers.30.post_attention_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 1115136 |
|
}, |
|
{ |
|
"name": "model.layers.30.post_feedforward_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 1124352 |
|
}, |
|
{ |
|
"name": "model.layers.30.pre_feedforward_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 1133568 |
|
}, |
|
{ |
|
"name": "model.layers.31.input_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 1142784 |
|
}, |
|
{ |
|
"name": "model.layers.31.post_attention_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 1152000 |
|
}, |
|
{ |
|
"name": "model.layers.31.post_feedforward_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 1161216 |
|
}, |
|
{ |
|
"name": "model.layers.31.pre_feedforward_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 1170432 |
|
}, |
|
{ |
|
"name": "model.layers.32.input_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 1179648 |
|
}, |
|
{ |
|
"name": "model.layers.32.post_attention_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 1188864 |
|
}, |
|
{ |
|
"name": "model.layers.32.post_feedforward_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 1198080 |
|
}, |
|
{ |
|
"name": "model.layers.32.pre_feedforward_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 1207296 |
|
}, |
|
{ |
|
"name": "model.layers.33.input_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 1216512 |
|
}, |
|
{ |
|
"name": "model.layers.33.post_attention_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 1225728 |
|
}, |
|
{ |
|
"name": "model.layers.33.post_feedforward_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 1234944 |
|
}, |
|
{ |
|
"name": "model.layers.33.pre_feedforward_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 1244160 |
|
}, |
|
{ |
|
"name": "model.layers.34.input_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 1253376 |
|
}, |
|
{ |
|
"name": "model.layers.34.post_attention_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 1262592 |
|
}, |
|
{ |
|
"name": "model.layers.34.post_feedforward_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 1271808 |
|
}, |
|
{ |
|
"name": "model.layers.34.pre_feedforward_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 1281024 |
|
}, |
|
{ |
|
"name": "model.layers.35.input_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 1290240 |
|
}, |
|
{ |
|
"name": "model.layers.35.post_attention_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 1299456 |
|
}, |
|
{ |
|
"name": "model.layers.35.post_feedforward_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 1308672 |
|
}, |
|
{ |
|
"name": "model.layers.35.pre_feedforward_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 1317888 |
|
}, |
|
{ |
|
"name": "model.layers.36.input_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 1327104 |
|
}, |
|
{ |
|
"name": "model.layers.36.post_attention_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 1336320 |
|
}, |
|
{ |
|
"name": "model.layers.36.post_feedforward_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 1345536 |
|
}, |
|
{ |
|
"name": "model.layers.36.pre_feedforward_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 1354752 |
|
}, |
|
{ |
|
"name": "model.layers.37.input_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 1363968 |
|
}, |
|
{ |
|
"name": "model.layers.37.post_attention_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 1373184 |
|
}, |
|
{ |
|
"name": "model.layers.37.post_feedforward_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 1382400 |
|
}, |
|
{ |
|
"name": "model.layers.37.pre_feedforward_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 1391616 |
|
}, |
|
{ |
|
"name": "model.layers.38.input_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 1400832 |
|
}, |
|
{ |
|
"name": "model.layers.38.post_attention_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 1410048 |
|
}, |
|
{ |
|
"name": "model.layers.38.post_feedforward_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 1419264 |
|
}, |
|
{ |
|
"name": "model.layers.38.pre_feedforward_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 1428480 |
|
}, |
|
{ |
|
"name": "model.layers.39.input_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 1437696 |
|
}, |
|
{ |
|
"name": "model.layers.39.post_attention_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 1446912 |
|
}, |
|
{ |
|
"name": "model.layers.39.post_feedforward_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 1456128 |
|
}, |
|
{ |
|
"name": "model.layers.39.pre_feedforward_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 1465344 |
|
}, |
|
{ |
|
"name": "model.layers.40.input_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 1474560 |
|
}, |
|
{ |
|
"name": "model.layers.40.post_attention_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 1483776 |
|
}, |
|
{ |
|
"name": "model.layers.40.post_feedforward_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 1492992 |
|
}, |
|
{ |
|
"name": "model.layers.40.pre_feedforward_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 1502208 |
|
}, |
|
{ |
|
"name": "model.layers.41.input_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 1511424 |
|
}, |
|
{ |
|
"name": "model.layers.41.post_attention_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 1520640 |
|
}, |
|
{ |
|
"name": "model.layers.41.post_feedforward_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 1529856 |
|
}, |
|
{ |
|
"name": "model.layers.41.pre_feedforward_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 1539072 |
|
}, |
|
{ |
|
"name": "model.layers.42.input_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 1548288 |
|
}, |
|
{ |
|
"name": "model.layers.42.post_attention_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 1557504 |
|
}, |
|
{ |
|
"name": "model.layers.42.post_feedforward_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 1566720 |
|
}, |
|
{ |
|
"name": "model.layers.42.pre_feedforward_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 1575936 |
|
}, |
|
{ |
|
"name": "model.layers.43.input_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 1585152 |
|
}, |
|
{ |
|
"name": "model.layers.43.post_attention_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 1594368 |
|
}, |
|
{ |
|
"name": "model.layers.43.post_feedforward_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 1603584 |
|
}, |
|
{ |
|
"name": "model.layers.43.pre_feedforward_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 1612800 |
|
}, |
|
{ |
|
"name": "model.layers.44.input_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 1622016 |
|
}, |
|
{ |
|
"name": "model.layers.44.post_attention_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 1631232 |
|
}, |
|
{ |
|
"name": "model.layers.44.post_feedforward_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 1640448 |
|
}, |
|
{ |
|
"name": "model.layers.44.pre_feedforward_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 1649664 |
|
}, |
|
{ |
|
"name": "model.layers.45.input_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 1658880 |
|
}, |
|
{ |
|
"name": "model.layers.45.post_attention_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 1668096 |
|
}, |
|
{ |
|
"name": "model.layers.45.post_feedforward_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 1677312 |
|
}, |
|
{ |
|
"name": "model.layers.45.pre_feedforward_layernorm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 1686528 |
|
}, |
|
{ |
|
"name": "model.norm.weight", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 1695744 |
|
} |
|
], |
|
"md5sum": "ac76064680a74df4c5cfd5b812c392d0" |
|
} |
|
] |
|
} |