|
{ |
|
"metadata": { |
|
"ParamSize": 147, |
|
"ParamBytes": 3778220032.0, |
|
"BitsPerParam": 16.0 |
|
}, |
|
"records": [ |
|
{ |
|
"dataPath": "params_shard_0.bin", |
|
"format": "raw-shard", |
|
"nbytes": 67108864, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.feed_forward.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 67108864, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "13620811167b805f8ea2a1d99fbd406c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_1.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.feed_forward.w2.weight", |
|
"shape": [ |
|
2048, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "562607cf49908cc655f991e0d55bcfab" |
|
}, |
|
{ |
|
"dataPath": "params_shard_2.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25174016, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.attention.wo.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.0.attention.wqkv.weight", |
|
"shape": [ |
|
4096, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16777216, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.0.attention_norm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 25165824 |
|
}, |
|
{ |
|
"name": "model.layers.0.ffn_norm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 25169920 |
|
} |
|
], |
|
"md5sum": "cdef617556087435d51959e9e9ed610e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_3.bin", |
|
"format": "raw-shard", |
|
"nbytes": 67108864, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.feed_forward.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 67108864, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "439c2ca4b319f35142265f6f82c5d302" |
|
}, |
|
{ |
|
"dataPath": "params_shard_4.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.feed_forward.w2.weight", |
|
"shape": [ |
|
2048, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7d65f1da58141a06abbe4b13a0940dee" |
|
}, |
|
{ |
|
"dataPath": "params_shard_5.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25174016, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.attention.wo.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.1.attention.wqkv.weight", |
|
"shape": [ |
|
4096, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16777216, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.1.attention_norm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 25165824 |
|
}, |
|
{ |
|
"name": "model.layers.1.ffn_norm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 25169920 |
|
} |
|
], |
|
"md5sum": "c43d939dc697ae883fa557c3a6acebfd" |
|
}, |
|
{ |
|
"dataPath": "params_shard_6.bin", |
|
"format": "raw-shard", |
|
"nbytes": 67108864, |
|
"records": [ |
|
{ |
|
"name": "model.layers.10.feed_forward.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 67108864, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a3897ce16b64b2952d540145eb9a0285" |
|
}, |
|
{ |
|
"dataPath": "params_shard_7.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.10.feed_forward.w2.weight", |
|
"shape": [ |
|
2048, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7feea0d894294bebc16db150da47e608" |
|
}, |
|
{ |
|
"dataPath": "params_shard_8.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25174016, |
|
"records": [ |
|
{ |
|
"name": "model.layers.10.attention.wo.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.10.attention.wqkv.weight", |
|
"shape": [ |
|
4096, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16777216, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.10.attention_norm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 25165824 |
|
}, |
|
{ |
|
"name": "model.layers.10.ffn_norm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 25169920 |
|
} |
|
], |
|
"md5sum": "06fd9fccc1cc527bdcdc0604ac15d86f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_9.bin", |
|
"format": "raw-shard", |
|
"nbytes": 67108864, |
|
"records": [ |
|
{ |
|
"name": "model.layers.11.feed_forward.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 67108864, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9078eec7783ca112e36c3a2684e1a867" |
|
}, |
|
{ |
|
"dataPath": "params_shard_10.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.11.feed_forward.w2.weight", |
|
"shape": [ |
|
2048, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a6fa373eb1a0079b814769b897f0d07c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_11.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25174016, |
|
"records": [ |
|
{ |
|
"name": "model.layers.11.attention.wo.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.11.attention.wqkv.weight", |
|
"shape": [ |
|
4096, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16777216, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.11.attention_norm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 25165824 |
|
}, |
|
{ |
|
"name": "model.layers.11.ffn_norm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 25169920 |
|
} |
|
], |
|
"md5sum": "55fbc26500896d2e10b7e7db5c83c0a5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_12.bin", |
|
"format": "raw-shard", |
|
"nbytes": 67108864, |
|
"records": [ |
|
{ |
|
"name": "model.layers.12.feed_forward.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 67108864, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "065d4381d53a30b79b19a0179d9c4a45" |
|
}, |
|
{ |
|
"dataPath": "params_shard_13.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.12.attention.wo.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.12.attention.wqkv.weight", |
|
"shape": [ |
|
4096, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16777216, |
|
"byteOffset": 8388608 |
|
} |
|
], |
|
"md5sum": "ebf9b884f5693236428d94869c969adc" |
|
}, |
|
{ |
|
"dataPath": "params_shard_14.bin", |
|
"format": "raw-shard", |
|
"nbytes": 67108864, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.feed_forward.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 67108864, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b7613846d39ce5f62b2f1dd30a9746a6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_15.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.feed_forward.w2.weight", |
|
"shape": [ |
|
2048, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0a194ceac00e1e59af2d7127b3c8cf2b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_16.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25174016, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.attention.wo.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.2.attention.wqkv.weight", |
|
"shape": [ |
|
4096, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16777216, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.2.attention_norm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 25165824 |
|
}, |
|
{ |
|
"name": "model.layers.2.ffn_norm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 25169920 |
|
} |
|
], |
|
"md5sum": "133438c2ed5e05aa25d9b0722be6ea20" |
|
}, |
|
{ |
|
"dataPath": "params_shard_17.bin", |
|
"format": "raw-shard", |
|
"nbytes": 67108864, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.feed_forward.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 67108864, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "424adead5dbf5c309417525896bc5577" |
|
}, |
|
{ |
|
"dataPath": "params_shard_18.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.feed_forward.w2.weight", |
|
"shape": [ |
|
2048, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "fbf2d36b86b2c09a93a4a1c9840dd781" |
|
}, |
|
{ |
|
"dataPath": "params_shard_19.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25174016, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.attention.wo.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.3.attention.wqkv.weight", |
|
"shape": [ |
|
4096, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16777216, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.3.attention_norm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 25165824 |
|
}, |
|
{ |
|
"name": "model.layers.3.ffn_norm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 25169920 |
|
} |
|
], |
|
"md5sum": "9841633cf8016dc1eb4bc2961a3f94aa" |
|
}, |
|
{ |
|
"dataPath": "params_shard_20.bin", |
|
"format": "raw-shard", |
|
"nbytes": 67108864, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.feed_forward.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 67108864, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "47f542e9709957aa8af72faa952d486f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_21.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.feed_forward.w2.weight", |
|
"shape": [ |
|
2048, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "de5cc4de49eb2939d747f591db5fd452" |
|
}, |
|
{ |
|
"dataPath": "params_shard_22.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25174016, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.attention.wo.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.4.attention.wqkv.weight", |
|
"shape": [ |
|
4096, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16777216, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.4.attention_norm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 25165824 |
|
}, |
|
{ |
|
"name": "model.layers.4.ffn_norm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 25169920 |
|
} |
|
], |
|
"md5sum": "e8a0aced0a7edbb64191d23f342bb81d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_23.bin", |
|
"format": "raw-shard", |
|
"nbytes": 67108864, |
|
"records": [ |
|
{ |
|
"name": "model.layers.5.feed_forward.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 67108864, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8f5e3eca294b217de661233d3904c69d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_24.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.5.feed_forward.w2.weight", |
|
"shape": [ |
|
2048, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f6f3b1f080fbc31c85636b8b542c1cc4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_25.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25174016, |
|
"records": [ |
|
{ |
|
"name": "model.layers.5.attention.wo.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.5.attention.wqkv.weight", |
|
"shape": [ |
|
4096, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16777216, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.5.attention_norm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 25165824 |
|
}, |
|
{ |
|
"name": "model.layers.5.ffn_norm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 25169920 |
|
} |
|
], |
|
"md5sum": "ec22329c2b39d676c61b4f7a13ff1955" |
|
}, |
|
{ |
|
"dataPath": "params_shard_26.bin", |
|
"format": "raw-shard", |
|
"nbytes": 67108864, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.feed_forward.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 67108864, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6aa83a5244c843e8abdf7eaa4bc5c3d2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_27.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.feed_forward.w2.weight", |
|
"shape": [ |
|
2048, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9c46bb790229cdd669d265e0ed1b1bdc" |
|
}, |
|
{ |
|
"dataPath": "params_shard_28.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25174016, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.attention.wo.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.6.attention.wqkv.weight", |
|
"shape": [ |
|
4096, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16777216, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.6.attention_norm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 25165824 |
|
}, |
|
{ |
|
"name": "model.layers.6.ffn_norm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 25169920 |
|
} |
|
], |
|
"md5sum": "39521843a30d216fd8e46c677bc07eee" |
|
}, |
|
{ |
|
"dataPath": "params_shard_29.bin", |
|
"format": "raw-shard", |
|
"nbytes": 67108864, |
|
"records": [ |
|
{ |
|
"name": "model.layers.7.feed_forward.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 67108864, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "79bf5afe398fcdad7933e47b6fb5b76c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_30.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.7.feed_forward.w2.weight", |
|
"shape": [ |
|
2048, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6d53d9f9d79dc2d8e80e0bc1307df84d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_31.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25174016, |
|
"records": [ |
|
{ |
|
"name": "model.layers.7.attention.wo.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.7.attention.wqkv.weight", |
|
"shape": [ |
|
4096, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16777216, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.7.attention_norm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 25165824 |
|
}, |
|
{ |
|
"name": "model.layers.7.ffn_norm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 25169920 |
|
} |
|
], |
|
"md5sum": "579d79fff40956baa246862fe41f8f68" |
|
}, |
|
{ |
|
"dataPath": "params_shard_32.bin", |
|
"format": "raw-shard", |
|
"nbytes": 67108864, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.feed_forward.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 67108864, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "872c26bf62854b30b5ceca2b0b2e133f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_33.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.feed_forward.w2.weight", |
|
"shape": [ |
|
2048, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9d9ce49d43fe40d22fbfea0b6189f699" |
|
}, |
|
{ |
|
"dataPath": "params_shard_34.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25174016, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.attention.wo.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.8.attention.wqkv.weight", |
|
"shape": [ |
|
4096, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16777216, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.8.attention_norm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 25165824 |
|
}, |
|
{ |
|
"name": "model.layers.8.ffn_norm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 25169920 |
|
} |
|
], |
|
"md5sum": "371871bcf96a27dfa0110a0c1adaca7a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_35.bin", |
|
"format": "raw-shard", |
|
"nbytes": 67108864, |
|
"records": [ |
|
{ |
|
"name": "model.layers.9.feed_forward.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 67108864, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1e200f5c650647936f0e504ed468e200" |
|
}, |
|
{ |
|
"dataPath": "params_shard_36.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.9.feed_forward.w2.weight", |
|
"shape": [ |
|
2048, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "64f3c7ab0cef1b04cbbf507f585c4f1e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_37.bin", |
|
"format": "raw-shard", |
|
"nbytes": 379060224, |
|
"records": [ |
|
{ |
|
"name": "model.tok_embeddings.weight", |
|
"shape": [ |
|
92544, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 379060224, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "22a60526a41a1ec740a36e930ab628ea" |
|
}, |
|
{ |
|
"dataPath": "params_shard_38.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.12.feed_forward.w2.weight", |
|
"shape": [ |
|
2048, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d67ebba07546dfac1203b260fc1d96c4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_39.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25182208, |
|
"records": [ |
|
{ |
|
"name": "model.layers.9.attention.wo.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.9.attention.wqkv.weight", |
|
"shape": [ |
|
4096, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16777216, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.9.attention_norm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 25165824 |
|
}, |
|
{ |
|
"name": "model.layers.9.ffn_norm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 25169920 |
|
}, |
|
{ |
|
"name": "model.layers.12.attention_norm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 25174016 |
|
}, |
|
{ |
|
"name": "model.layers.12.ffn_norm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 25178112 |
|
} |
|
], |
|
"md5sum": "fea60b0b5b39f57a50b5f79fd5db77f5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_40.bin", |
|
"format": "raw-shard", |
|
"nbytes": 67108864, |
|
"records": [ |
|
{ |
|
"name": "model.layers.13.feed_forward.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 67108864, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7336040e0332518e23a109a77622c95f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_41.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.13.feed_forward.w2.weight", |
|
"shape": [ |
|
2048, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1bd18ed0fe66988af08bd9a3678123f6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_42.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25174016, |
|
"records": [ |
|
{ |
|
"name": "model.layers.13.attention.wo.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.13.attention.wqkv.weight", |
|
"shape": [ |
|
4096, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16777216, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.13.attention_norm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 25165824 |
|
}, |
|
{ |
|
"name": "model.layers.13.ffn_norm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 25169920 |
|
} |
|
], |
|
"md5sum": "81fc643e24286d745d9d3e9db68fcce7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_43.bin", |
|
"format": "raw-shard", |
|
"nbytes": 67108864, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.feed_forward.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 67108864, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "71c1416f7db0cba954b133615bff9451" |
|
}, |
|
{ |
|
"dataPath": "params_shard_44.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.feed_forward.w2.weight", |
|
"shape": [ |
|
2048, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "dfa4ae9fb4158eca19611a07d3b249a5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_45.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25174016, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.attention.wo.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.14.attention.wqkv.weight", |
|
"shape": [ |
|
4096, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16777216, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.14.attention_norm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 25165824 |
|
}, |
|
{ |
|
"name": "model.layers.14.ffn_norm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 25169920 |
|
} |
|
], |
|
"md5sum": "88e34648f33e99e5d74a34a259ba97a0" |
|
}, |
|
{ |
|
"dataPath": "params_shard_46.bin", |
|
"format": "raw-shard", |
|
"nbytes": 67108864, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.feed_forward.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 67108864, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3678f7b6adcb837bb5471013aa8a677e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_47.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.feed_forward.w2.weight", |
|
"shape": [ |
|
2048, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5fcd3ce8a74f83581106483947f5a39f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_48.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25174016, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.attention.wo.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.15.attention.wqkv.weight", |
|
"shape": [ |
|
4096, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16777216, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.15.attention_norm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 25165824 |
|
}, |
|
{ |
|
"name": "model.layers.15.ffn_norm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 25169920 |
|
} |
|
], |
|
"md5sum": "5d3f1ee1d9118aeb8d3a4a6dbd1d1a80" |
|
}, |
|
{ |
|
"dataPath": "params_shard_49.bin", |
|
"format": "raw-shard", |
|
"nbytes": 67108864, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.feed_forward.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 67108864, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f030e01a341f26c32ef40cf8e2e56671" |
|
}, |
|
{ |
|
"dataPath": "params_shard_50.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.feed_forward.w2.weight", |
|
"shape": [ |
|
2048, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "aca438bd5c0a97326bc9cb4f40ba127c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_51.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25174016, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.attention.wo.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.16.attention.wqkv.weight", |
|
"shape": [ |
|
4096, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16777216, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.16.attention_norm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 25165824 |
|
}, |
|
{ |
|
"name": "model.layers.16.ffn_norm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 25169920 |
|
} |
|
], |
|
"md5sum": "f1e82431d8dc0005139f5c588055e091" |
|
}, |
|
{ |
|
"dataPath": "params_shard_52.bin", |
|
"format": "raw-shard", |
|
"nbytes": 67108864, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.feed_forward.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 67108864, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "acdaf4671c25f9d0968de5dd7f328fa0" |
|
}, |
|
{ |
|
"dataPath": "params_shard_53.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.feed_forward.w2.weight", |
|
"shape": [ |
|
2048, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2f98fa680df4cd9396b080e39ba7ac4b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_54.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25174016, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.attention.wo.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.17.attention.wqkv.weight", |
|
"shape": [ |
|
4096, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16777216, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.17.attention_norm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 25165824 |
|
}, |
|
{ |
|
"name": "model.layers.17.ffn_norm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 25169920 |
|
} |
|
], |
|
"md5sum": "1e1fdcea5051f6d03ba5d07a6e05ad10" |
|
}, |
|
{ |
|
"dataPath": "params_shard_55.bin", |
|
"format": "raw-shard", |
|
"nbytes": 67108864, |
|
"records": [ |
|
{ |
|
"name": "model.layers.18.feed_forward.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 67108864, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e9439edabf330d64ea237c65ddfc0e7c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_56.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.18.feed_forward.w2.weight", |
|
"shape": [ |
|
2048, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "08447849f75fe55e869edb54075b9684" |
|
}, |
|
{ |
|
"dataPath": "params_shard_57.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25174016, |
|
"records": [ |
|
{ |
|
"name": "model.layers.18.attention.wo.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.18.attention.wqkv.weight", |
|
"shape": [ |
|
4096, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16777216, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.18.attention_norm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 25165824 |
|
}, |
|
{ |
|
"name": "model.layers.18.ffn_norm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 25169920 |
|
} |
|
], |
|
"md5sum": "8235ed3a474bada5c929bfe4a1a1b998" |
|
}, |
|
{ |
|
"dataPath": "params_shard_58.bin", |
|
"format": "raw-shard", |
|
"nbytes": 67108864, |
|
"records": [ |
|
{ |
|
"name": "model.layers.19.feed_forward.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 67108864, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b0125004e88f868ac4ead47da62eee9c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_59.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.19.feed_forward.w2.weight", |
|
"shape": [ |
|
2048, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "871462f1f4a3befbf8e843f144da9a97" |
|
}, |
|
{ |
|
"dataPath": "params_shard_60.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25174016, |
|
"records": [ |
|
{ |
|
"name": "model.layers.19.attention.wo.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.19.attention.wqkv.weight", |
|
"shape": [ |
|
4096, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16777216, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.19.attention_norm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 25165824 |
|
}, |
|
{ |
|
"name": "model.layers.19.ffn_norm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 25169920 |
|
} |
|
], |
|
"md5sum": "18f6ca2e5e8140418016df65d6f70dd8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_61.bin", |
|
"format": "raw-shard", |
|
"nbytes": 67108864, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.feed_forward.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 67108864, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d1b94d7fd217eec8644f1b82d0925bb3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_62.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.feed_forward.w2.weight", |
|
"shape": [ |
|
2048, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ef05f26662406aaa151ee96f14560d3b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_63.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25174016, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.attention.wo.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.20.attention.wqkv.weight", |
|
"shape": [ |
|
4096, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16777216, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.20.attention_norm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 25165824 |
|
}, |
|
{ |
|
"name": "model.layers.20.ffn_norm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 25169920 |
|
} |
|
], |
|
"md5sum": "1bb51ea71c1ab50a84ab2f95c2c63175" |
|
}, |
|
{ |
|
"dataPath": "params_shard_64.bin", |
|
"format": "raw-shard", |
|
"nbytes": 67108864, |
|
"records": [ |
|
{ |
|
"name": "model.layers.21.feed_forward.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 67108864, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c30634533c2fc59d575615fa4ac9214b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_65.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.21.feed_forward.w2.weight", |
|
"shape": [ |
|
2048, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c123a4af918d13adf4a76d66c23f8500" |
|
}, |
|
{ |
|
"dataPath": "params_shard_66.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25174016, |
|
"records": [ |
|
{ |
|
"name": "model.layers.21.attention.wo.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.21.attention.wqkv.weight", |
|
"shape": [ |
|
4096, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16777216, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.21.attention_norm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 25165824 |
|
}, |
|
{ |
|
"name": "model.layers.21.ffn_norm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 25169920 |
|
} |
|
], |
|
"md5sum": "6554be6c98a0dc256082e9152d1f0e21" |
|
}, |
|
{ |
|
"dataPath": "params_shard_67.bin", |
|
"format": "raw-shard", |
|
"nbytes": 67108864, |
|
"records": [ |
|
{ |
|
"name": "model.layers.22.feed_forward.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 67108864, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2afc907981b853485b4978d87f9024ca" |
|
}, |
|
{ |
|
"dataPath": "params_shard_68.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.22.feed_forward.w2.weight", |
|
"shape": [ |
|
2048, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "fe186966e79790cc22bbeefbc75af8dd" |
|
}, |
|
{ |
|
"dataPath": "params_shard_69.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25174016, |
|
"records": [ |
|
{ |
|
"name": "model.layers.22.attention.wo.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.22.attention.wqkv.weight", |
|
"shape": [ |
|
4096, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16777216, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.22.attention_norm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 25165824 |
|
}, |
|
{ |
|
"name": "model.layers.22.ffn_norm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 25169920 |
|
} |
|
], |
|
"md5sum": "4c76fe74edbf769084ab1171ed00266f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_70.bin", |
|
"format": "raw-shard", |
|
"nbytes": 67108864, |
|
"records": [ |
|
{ |
|
"name": "model.layers.23.feed_forward.gate_up_proj.weight", |
|
"shape": [ |
|
16384, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 67108864, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6440fb6e14258b7f4dea6dc77b8262a9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_71.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.23.feed_forward.w2.weight", |
|
"shape": [ |
|
2048, |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "cc5c095b532c915b19d5718c3e6bfe4e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_72.bin", |
|
"format": "raw-shard", |
|
"nbytes": 379060224, |
|
"records": [ |
|
{ |
|
"name": "output.weight", |
|
"shape": [ |
|
92544, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 379060224, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a5b778eef7ec6da1957b35e1888c3333" |
|
}, |
|
{ |
|
"dataPath": "params_shard_73.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25178112, |
|
"records": [ |
|
{ |
|
"name": "model.layers.23.attention.wo.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.23.attention.wqkv.weight", |
|
"shape": [ |
|
4096, |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16777216, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.23.attention_norm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 25165824 |
|
}, |
|
{ |
|
"name": "model.layers.23.ffn_norm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 25169920 |
|
}, |
|
{ |
|
"name": "model.norm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 25174016 |
|
} |
|
], |
|
"md5sum": "5735df772a182e0db0a8ac4b6937351a" |
|
} |
|
] |
|
} |