{ "metadata": { "ParamSize": 147, "ParamBytes": 3778220032.0, "BitsPerParam": 16.0 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.0.feed_forward.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "13620811167b805f8ea2a1d99fbd406c" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.0.feed_forward.w2.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "562607cf49908cc655f991e0d55bcfab" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 25174016, "records": [ { "name": "model.layers.0.attention.wo.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.0.attention.wqkv.weight", "shape": [ 4096, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 8388608 }, { "name": "model.layers.0.attention_norm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25165824 }, { "name": "model.layers.0.ffn_norm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25169920 } ], "md5sum": "cdef617556087435d51959e9e9ed610e" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.1.feed_forward.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "439c2ca4b319f35142265f6f82c5d302" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.1.feed_forward.w2.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "7d65f1da58141a06abbe4b13a0940dee" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 25174016, "records": [ { "name": "model.layers.1.attention.wo.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.1.attention.wqkv.weight", "shape": [ 4096, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 8388608 }, { "name": "model.layers.1.attention_norm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25165824 }, { "name": "model.layers.1.ffn_norm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25169920 } ], "md5sum": "c43d939dc697ae883fa557c3a6acebfd" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.10.feed_forward.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "a3897ce16b64b2952d540145eb9a0285" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.10.feed_forward.w2.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "7feea0d894294bebc16db150da47e608" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 25174016, "records": [ { "name": "model.layers.10.attention.wo.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.10.attention.wqkv.weight", "shape": [ 4096, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 8388608 }, { "name": "model.layers.10.attention_norm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25165824 }, { "name": "model.layers.10.ffn_norm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25169920 } ], "md5sum": "06fd9fccc1cc527bdcdc0604ac15d86f" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.11.feed_forward.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "9078eec7783ca112e36c3a2684e1a867" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.11.feed_forward.w2.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "a6fa373eb1a0079b814769b897f0d07c" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 25174016, "records": [ { "name": "model.layers.11.attention.wo.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.11.attention.wqkv.weight", "shape": [ 4096, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 8388608 }, { "name": "model.layers.11.attention_norm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25165824 }, { "name": "model.layers.11.ffn_norm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25169920 } ], "md5sum": "55fbc26500896d2e10b7e7db5c83c0a5" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.12.feed_forward.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "065d4381d53a30b79b19a0179d9c4a45" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.12.attention.wo.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.12.attention.wqkv.weight", "shape": [ 4096, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 8388608 } ], "md5sum": "ebf9b884f5693236428d94869c969adc" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.2.feed_forward.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "b7613846d39ce5f62b2f1dd30a9746a6" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.2.feed_forward.w2.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "0a194ceac00e1e59af2d7127b3c8cf2b" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 25174016, "records": [ { "name": "model.layers.2.attention.wo.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.2.attention.wqkv.weight", "shape": [ 4096, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 8388608 }, { "name": "model.layers.2.attention_norm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25165824 }, { "name": "model.layers.2.ffn_norm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25169920 } ], "md5sum": "133438c2ed5e05aa25d9b0722be6ea20" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.3.feed_forward.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "424adead5dbf5c309417525896bc5577" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.3.feed_forward.w2.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "fbf2d36b86b2c09a93a4a1c9840dd781" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 25174016, "records": [ { "name": "model.layers.3.attention.wo.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.3.attention.wqkv.weight", "shape": [ 4096, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 8388608 }, { "name": "model.layers.3.attention_norm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25165824 }, { "name": "model.layers.3.ffn_norm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25169920 } ], "md5sum": "9841633cf8016dc1eb4bc2961a3f94aa" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.4.feed_forward.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "47f542e9709957aa8af72faa952d486f" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.4.feed_forward.w2.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "de5cc4de49eb2939d747f591db5fd452" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 25174016, "records": [ { "name": "model.layers.4.attention.wo.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.4.attention.wqkv.weight", "shape": [ 4096, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 8388608 }, { "name": "model.layers.4.attention_norm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25165824 }, { "name": "model.layers.4.ffn_norm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25169920 } ], "md5sum": "e8a0aced0a7edbb64191d23f342bb81d" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.5.feed_forward.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "8f5e3eca294b217de661233d3904c69d" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.5.feed_forward.w2.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "f6f3b1f080fbc31c85636b8b542c1cc4" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 25174016, "records": [ { "name": "model.layers.5.attention.wo.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.5.attention.wqkv.weight", "shape": [ 4096, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 8388608 }, { "name": "model.layers.5.attention_norm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25165824 }, { "name": "model.layers.5.ffn_norm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25169920 } ], "md5sum": "ec22329c2b39d676c61b4f7a13ff1955" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.6.feed_forward.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "6aa83a5244c843e8abdf7eaa4bc5c3d2" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.6.feed_forward.w2.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "9c46bb790229cdd669d265e0ed1b1bdc" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 25174016, "records": [ { "name": "model.layers.6.attention.wo.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.6.attention.wqkv.weight", "shape": [ 4096, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 8388608 }, { "name": "model.layers.6.attention_norm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25165824 }, { "name": "model.layers.6.ffn_norm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25169920 } ], "md5sum": "39521843a30d216fd8e46c677bc07eee" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.7.feed_forward.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "79bf5afe398fcdad7933e47b6fb5b76c" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.7.feed_forward.w2.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "6d53d9f9d79dc2d8e80e0bc1307df84d" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 25174016, "records": [ { "name": "model.layers.7.attention.wo.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.7.attention.wqkv.weight", "shape": [ 4096, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 8388608 }, { "name": "model.layers.7.attention_norm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25165824 }, { "name": "model.layers.7.ffn_norm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25169920 } ], "md5sum": "579d79fff40956baa246862fe41f8f68" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.8.feed_forward.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "872c26bf62854b30b5ceca2b0b2e133f" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.8.feed_forward.w2.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "9d9ce49d43fe40d22fbfea0b6189f699" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 25174016, "records": [ { "name": "model.layers.8.attention.wo.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.8.attention.wqkv.weight", "shape": [ 4096, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 8388608 }, { "name": "model.layers.8.attention_norm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25165824 }, { "name": "model.layers.8.ffn_norm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25169920 } ], "md5sum": "371871bcf96a27dfa0110a0c1adaca7a" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.9.feed_forward.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "1e200f5c650647936f0e504ed468e200" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.9.feed_forward.w2.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "64f3c7ab0cef1b04cbbf507f585c4f1e" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 379060224, "records": [ { "name": "model.tok_embeddings.weight", "shape": [ 92544, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 379060224, "byteOffset": 0 } ], "md5sum": "22a60526a41a1ec740a36e930ab628ea" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.12.feed_forward.w2.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "d67ebba07546dfac1203b260fc1d96c4" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 25182208, "records": [ { "name": "model.layers.9.attention.wo.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.9.attention.wqkv.weight", "shape": [ 4096, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 8388608 }, { "name": "model.layers.9.attention_norm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25165824 }, { "name": "model.layers.9.ffn_norm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25169920 }, { "name": "model.layers.12.attention_norm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25174016 }, { "name": "model.layers.12.ffn_norm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25178112 } ], "md5sum": "fea60b0b5b39f57a50b5f79fd5db77f5" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.13.feed_forward.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "7336040e0332518e23a109a77622c95f" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.13.feed_forward.w2.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "1bd18ed0fe66988af08bd9a3678123f6" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 25174016, "records": [ { "name": "model.layers.13.attention.wo.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.13.attention.wqkv.weight", "shape": [ 4096, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 8388608 }, { "name": "model.layers.13.attention_norm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25165824 }, { "name": "model.layers.13.ffn_norm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25169920 } ], "md5sum": "81fc643e24286d745d9d3e9db68fcce7" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.14.feed_forward.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "71c1416f7db0cba954b133615bff9451" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.14.feed_forward.w2.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "dfa4ae9fb4158eca19611a07d3b249a5" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 25174016, "records": [ { "name": "model.layers.14.attention.wo.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.14.attention.wqkv.weight", "shape": [ 4096, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 8388608 }, { "name": "model.layers.14.attention_norm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25165824 }, { "name": "model.layers.14.ffn_norm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25169920 } ], "md5sum": "88e34648f33e99e5d74a34a259ba97a0" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.15.feed_forward.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "3678f7b6adcb837bb5471013aa8a677e" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.15.feed_forward.w2.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "5fcd3ce8a74f83581106483947f5a39f" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 25174016, "records": [ { "name": "model.layers.15.attention.wo.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.15.attention.wqkv.weight", "shape": [ 4096, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 8388608 }, { "name": "model.layers.15.attention_norm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25165824 }, { "name": "model.layers.15.ffn_norm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25169920 } ], "md5sum": "5d3f1ee1d9118aeb8d3a4a6dbd1d1a80" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.16.feed_forward.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "f030e01a341f26c32ef40cf8e2e56671" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.16.feed_forward.w2.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "aca438bd5c0a97326bc9cb4f40ba127c" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 25174016, "records": [ { "name": "model.layers.16.attention.wo.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.16.attention.wqkv.weight", "shape": [ 4096, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 8388608 }, { "name": "model.layers.16.attention_norm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25165824 }, { "name": "model.layers.16.ffn_norm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25169920 } ], "md5sum": "f1e82431d8dc0005139f5c588055e091" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.17.feed_forward.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "acdaf4671c25f9d0968de5dd7f328fa0" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.17.feed_forward.w2.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "2f98fa680df4cd9396b080e39ba7ac4b" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 25174016, "records": [ { "name": "model.layers.17.attention.wo.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.17.attention.wqkv.weight", "shape": [ 4096, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 8388608 }, { "name": "model.layers.17.attention_norm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25165824 }, { "name": "model.layers.17.ffn_norm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25169920 } ], "md5sum": "1e1fdcea5051f6d03ba5d07a6e05ad10" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.18.feed_forward.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "e9439edabf330d64ea237c65ddfc0e7c" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.18.feed_forward.w2.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "08447849f75fe55e869edb54075b9684" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 25174016, "records": [ { "name": "model.layers.18.attention.wo.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.18.attention.wqkv.weight", "shape": [ 4096, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 8388608 }, { "name": "model.layers.18.attention_norm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25165824 }, { "name": "model.layers.18.ffn_norm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25169920 } ], "md5sum": "8235ed3a474bada5c929bfe4a1a1b998" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.19.feed_forward.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "b0125004e88f868ac4ead47da62eee9c" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.19.feed_forward.w2.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "871462f1f4a3befbf8e843f144da9a97" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 25174016, "records": [ { "name": "model.layers.19.attention.wo.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.19.attention.wqkv.weight", "shape": [ 4096, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 8388608 }, { "name": "model.layers.19.attention_norm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25165824 }, { "name": "model.layers.19.ffn_norm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25169920 } ], "md5sum": "18f6ca2e5e8140418016df65d6f70dd8" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.20.feed_forward.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "d1b94d7fd217eec8644f1b82d0925bb3" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.20.feed_forward.w2.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "ef05f26662406aaa151ee96f14560d3b" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 25174016, "records": [ { "name": "model.layers.20.attention.wo.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.20.attention.wqkv.weight", "shape": [ 4096, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 8388608 }, { "name": "model.layers.20.attention_norm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25165824 }, { "name": "model.layers.20.ffn_norm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25169920 } ], "md5sum": "1bb51ea71c1ab50a84ab2f95c2c63175" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.21.feed_forward.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "c30634533c2fc59d575615fa4ac9214b" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.21.feed_forward.w2.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "c123a4af918d13adf4a76d66c23f8500" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 25174016, "records": [ { "name": "model.layers.21.attention.wo.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.21.attention.wqkv.weight", "shape": [ 4096, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 8388608 }, { "name": "model.layers.21.attention_norm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25165824 }, { "name": "model.layers.21.ffn_norm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25169920 } ], "md5sum": "6554be6c98a0dc256082e9152d1f0e21" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.22.feed_forward.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "2afc907981b853485b4978d87f9024ca" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.22.feed_forward.w2.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "fe186966e79790cc22bbeefbc75af8dd" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 25174016, "records": [ { "name": "model.layers.22.attention.wo.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.22.attention.wqkv.weight", "shape": [ 4096, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 8388608 }, { "name": "model.layers.22.attention_norm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25165824 }, { "name": "model.layers.22.ffn_norm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25169920 } ], "md5sum": "4c76fe74edbf769084ab1171ed00266f" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.23.feed_forward.gate_up_proj.weight", "shape": [ 16384, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "6440fb6e14258b7f4dea6dc77b8262a9" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.23.feed_forward.w2.weight", "shape": [ 2048, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "cc5c095b532c915b19d5718c3e6bfe4e" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 379060224, "records": [ { "name": "output.weight", "shape": [ 92544, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 379060224, "byteOffset": 0 } ], "md5sum": "a5b778eef7ec6da1957b35e1888c3333" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 25178112, "records": [ { "name": "model.layers.23.attention.wo.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.23.attention.wqkv.weight", "shape": [ 4096, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16777216, "byteOffset": 8388608 }, { "name": "model.layers.23.attention_norm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25165824 }, { "name": "model.layers.23.ffn_norm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25169920 }, { "name": "model.norm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 25174016 } ], "md5sum": "5735df772a182e0db0a8ac4b6937351a" } ] }