internlm2_5-1_8b-chat-q0f16-MLC / ndarray-cache.json
riczhou's picture
Upload folder using huggingface_hub
c4f4b03 verified
{
"metadata": {
"ParamSize": 147,
"ParamBytes": 3778220032.0,
"BitsPerParam": 16.0
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.0.feed_forward.gate_up_proj.weight",
"shape": [
16384,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "13620811167b805f8ea2a1d99fbd406c"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.0.feed_forward.w2.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "562607cf49908cc655f991e0d55bcfab"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 25174016,
"records": [
{
"name": "model.layers.0.attention.wo.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.0.attention.wqkv.weight",
"shape": [
4096,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 8388608
},
{
"name": "model.layers.0.attention_norm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25165824
},
{
"name": "model.layers.0.ffn_norm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25169920
}
],
"md5sum": "cdef617556087435d51959e9e9ed610e"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.1.feed_forward.gate_up_proj.weight",
"shape": [
16384,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "439c2ca4b319f35142265f6f82c5d302"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.1.feed_forward.w2.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "7d65f1da58141a06abbe4b13a0940dee"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 25174016,
"records": [
{
"name": "model.layers.1.attention.wo.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.1.attention.wqkv.weight",
"shape": [
4096,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 8388608
},
{
"name": "model.layers.1.attention_norm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25165824
},
{
"name": "model.layers.1.ffn_norm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25169920
}
],
"md5sum": "c43d939dc697ae883fa557c3a6acebfd"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.10.feed_forward.gate_up_proj.weight",
"shape": [
16384,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "a3897ce16b64b2952d540145eb9a0285"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.10.feed_forward.w2.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "7feea0d894294bebc16db150da47e608"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 25174016,
"records": [
{
"name": "model.layers.10.attention.wo.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.10.attention.wqkv.weight",
"shape": [
4096,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 8388608
},
{
"name": "model.layers.10.attention_norm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25165824
},
{
"name": "model.layers.10.ffn_norm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25169920
}
],
"md5sum": "06fd9fccc1cc527bdcdc0604ac15d86f"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.11.feed_forward.gate_up_proj.weight",
"shape": [
16384,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "9078eec7783ca112e36c3a2684e1a867"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.11.feed_forward.w2.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "a6fa373eb1a0079b814769b897f0d07c"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 25174016,
"records": [
{
"name": "model.layers.11.attention.wo.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.11.attention.wqkv.weight",
"shape": [
4096,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 8388608
},
{
"name": "model.layers.11.attention_norm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25165824
},
{
"name": "model.layers.11.ffn_norm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25169920
}
],
"md5sum": "55fbc26500896d2e10b7e7db5c83c0a5"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.12.feed_forward.gate_up_proj.weight",
"shape": [
16384,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "065d4381d53a30b79b19a0179d9c4a45"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.12.attention.wo.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.12.attention.wqkv.weight",
"shape": [
4096,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 8388608
}
],
"md5sum": "ebf9b884f5693236428d94869c969adc"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.2.feed_forward.gate_up_proj.weight",
"shape": [
16384,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "b7613846d39ce5f62b2f1dd30a9746a6"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.2.feed_forward.w2.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "0a194ceac00e1e59af2d7127b3c8cf2b"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 25174016,
"records": [
{
"name": "model.layers.2.attention.wo.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.2.attention.wqkv.weight",
"shape": [
4096,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 8388608
},
{
"name": "model.layers.2.attention_norm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25165824
},
{
"name": "model.layers.2.ffn_norm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25169920
}
],
"md5sum": "133438c2ed5e05aa25d9b0722be6ea20"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.3.feed_forward.gate_up_proj.weight",
"shape": [
16384,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "424adead5dbf5c309417525896bc5577"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.3.feed_forward.w2.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "fbf2d36b86b2c09a93a4a1c9840dd781"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 25174016,
"records": [
{
"name": "model.layers.3.attention.wo.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.3.attention.wqkv.weight",
"shape": [
4096,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 8388608
},
{
"name": "model.layers.3.attention_norm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25165824
},
{
"name": "model.layers.3.ffn_norm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25169920
}
],
"md5sum": "9841633cf8016dc1eb4bc2961a3f94aa"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.4.feed_forward.gate_up_proj.weight",
"shape": [
16384,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "47f542e9709957aa8af72faa952d486f"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.4.feed_forward.w2.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "de5cc4de49eb2939d747f591db5fd452"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 25174016,
"records": [
{
"name": "model.layers.4.attention.wo.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.4.attention.wqkv.weight",
"shape": [
4096,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 8388608
},
{
"name": "model.layers.4.attention_norm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25165824
},
{
"name": "model.layers.4.ffn_norm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25169920
}
],
"md5sum": "e8a0aced0a7edbb64191d23f342bb81d"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.5.feed_forward.gate_up_proj.weight",
"shape": [
16384,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "8f5e3eca294b217de661233d3904c69d"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.5.feed_forward.w2.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "f6f3b1f080fbc31c85636b8b542c1cc4"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 25174016,
"records": [
{
"name": "model.layers.5.attention.wo.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.5.attention.wqkv.weight",
"shape": [
4096,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 8388608
},
{
"name": "model.layers.5.attention_norm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25165824
},
{
"name": "model.layers.5.ffn_norm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25169920
}
],
"md5sum": "ec22329c2b39d676c61b4f7a13ff1955"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.6.feed_forward.gate_up_proj.weight",
"shape": [
16384,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "6aa83a5244c843e8abdf7eaa4bc5c3d2"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.6.feed_forward.w2.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "9c46bb790229cdd669d265e0ed1b1bdc"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 25174016,
"records": [
{
"name": "model.layers.6.attention.wo.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.6.attention.wqkv.weight",
"shape": [
4096,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 8388608
},
{
"name": "model.layers.6.attention_norm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25165824
},
{
"name": "model.layers.6.ffn_norm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25169920
}
],
"md5sum": "39521843a30d216fd8e46c677bc07eee"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.7.feed_forward.gate_up_proj.weight",
"shape": [
16384,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "79bf5afe398fcdad7933e47b6fb5b76c"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.7.feed_forward.w2.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "6d53d9f9d79dc2d8e80e0bc1307df84d"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 25174016,
"records": [
{
"name": "model.layers.7.attention.wo.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.7.attention.wqkv.weight",
"shape": [
4096,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 8388608
},
{
"name": "model.layers.7.attention_norm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25165824
},
{
"name": "model.layers.7.ffn_norm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25169920
}
],
"md5sum": "579d79fff40956baa246862fe41f8f68"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.8.feed_forward.gate_up_proj.weight",
"shape": [
16384,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "872c26bf62854b30b5ceca2b0b2e133f"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.8.feed_forward.w2.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "9d9ce49d43fe40d22fbfea0b6189f699"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 25174016,
"records": [
{
"name": "model.layers.8.attention.wo.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.8.attention.wqkv.weight",
"shape": [
4096,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 8388608
},
{
"name": "model.layers.8.attention_norm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25165824
},
{
"name": "model.layers.8.ffn_norm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25169920
}
],
"md5sum": "371871bcf96a27dfa0110a0c1adaca7a"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.9.feed_forward.gate_up_proj.weight",
"shape": [
16384,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "1e200f5c650647936f0e504ed468e200"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.9.feed_forward.w2.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "64f3c7ab0cef1b04cbbf507f585c4f1e"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 379060224,
"records": [
{
"name": "model.tok_embeddings.weight",
"shape": [
92544,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 379060224,
"byteOffset": 0
}
],
"md5sum": "22a60526a41a1ec740a36e930ab628ea"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.12.feed_forward.w2.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "d67ebba07546dfac1203b260fc1d96c4"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 25182208,
"records": [
{
"name": "model.layers.9.attention.wo.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.9.attention.wqkv.weight",
"shape": [
4096,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 8388608
},
{
"name": "model.layers.9.attention_norm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25165824
},
{
"name": "model.layers.9.ffn_norm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25169920
},
{
"name": "model.layers.12.attention_norm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25174016
},
{
"name": "model.layers.12.ffn_norm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25178112
}
],
"md5sum": "fea60b0b5b39f57a50b5f79fd5db77f5"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.13.feed_forward.gate_up_proj.weight",
"shape": [
16384,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "7336040e0332518e23a109a77622c95f"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.13.feed_forward.w2.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "1bd18ed0fe66988af08bd9a3678123f6"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 25174016,
"records": [
{
"name": "model.layers.13.attention.wo.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.13.attention.wqkv.weight",
"shape": [
4096,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 8388608
},
{
"name": "model.layers.13.attention_norm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25165824
},
{
"name": "model.layers.13.ffn_norm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25169920
}
],
"md5sum": "81fc643e24286d745d9d3e9db68fcce7"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.14.feed_forward.gate_up_proj.weight",
"shape": [
16384,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "71c1416f7db0cba954b133615bff9451"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.14.feed_forward.w2.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "dfa4ae9fb4158eca19611a07d3b249a5"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 25174016,
"records": [
{
"name": "model.layers.14.attention.wo.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.14.attention.wqkv.weight",
"shape": [
4096,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 8388608
},
{
"name": "model.layers.14.attention_norm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25165824
},
{
"name": "model.layers.14.ffn_norm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25169920
}
],
"md5sum": "88e34648f33e99e5d74a34a259ba97a0"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.15.feed_forward.gate_up_proj.weight",
"shape": [
16384,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "3678f7b6adcb837bb5471013aa8a677e"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.15.feed_forward.w2.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "5fcd3ce8a74f83581106483947f5a39f"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 25174016,
"records": [
{
"name": "model.layers.15.attention.wo.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.15.attention.wqkv.weight",
"shape": [
4096,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 8388608
},
{
"name": "model.layers.15.attention_norm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25165824
},
{
"name": "model.layers.15.ffn_norm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25169920
}
],
"md5sum": "5d3f1ee1d9118aeb8d3a4a6dbd1d1a80"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.16.feed_forward.gate_up_proj.weight",
"shape": [
16384,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "f030e01a341f26c32ef40cf8e2e56671"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.16.feed_forward.w2.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "aca438bd5c0a97326bc9cb4f40ba127c"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 25174016,
"records": [
{
"name": "model.layers.16.attention.wo.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.16.attention.wqkv.weight",
"shape": [
4096,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 8388608
},
{
"name": "model.layers.16.attention_norm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25165824
},
{
"name": "model.layers.16.ffn_norm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25169920
}
],
"md5sum": "f1e82431d8dc0005139f5c588055e091"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.17.feed_forward.gate_up_proj.weight",
"shape": [
16384,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "acdaf4671c25f9d0968de5dd7f328fa0"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.17.feed_forward.w2.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "2f98fa680df4cd9396b080e39ba7ac4b"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 25174016,
"records": [
{
"name": "model.layers.17.attention.wo.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.17.attention.wqkv.weight",
"shape": [
4096,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 8388608
},
{
"name": "model.layers.17.attention_norm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25165824
},
{
"name": "model.layers.17.ffn_norm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25169920
}
],
"md5sum": "1e1fdcea5051f6d03ba5d07a6e05ad10"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.18.feed_forward.gate_up_proj.weight",
"shape": [
16384,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "e9439edabf330d64ea237c65ddfc0e7c"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.18.feed_forward.w2.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "08447849f75fe55e869edb54075b9684"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 25174016,
"records": [
{
"name": "model.layers.18.attention.wo.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.18.attention.wqkv.weight",
"shape": [
4096,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 8388608
},
{
"name": "model.layers.18.attention_norm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25165824
},
{
"name": "model.layers.18.ffn_norm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25169920
}
],
"md5sum": "8235ed3a474bada5c929bfe4a1a1b998"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.19.feed_forward.gate_up_proj.weight",
"shape": [
16384,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "b0125004e88f868ac4ead47da62eee9c"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.19.feed_forward.w2.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "871462f1f4a3befbf8e843f144da9a97"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 25174016,
"records": [
{
"name": "model.layers.19.attention.wo.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.19.attention.wqkv.weight",
"shape": [
4096,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 8388608
},
{
"name": "model.layers.19.attention_norm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25165824
},
{
"name": "model.layers.19.ffn_norm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25169920
}
],
"md5sum": "18f6ca2e5e8140418016df65d6f70dd8"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.20.feed_forward.gate_up_proj.weight",
"shape": [
16384,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "d1b94d7fd217eec8644f1b82d0925bb3"
},
{
"dataPath": "params_shard_62.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.20.feed_forward.w2.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "ef05f26662406aaa151ee96f14560d3b"
},
{
"dataPath": "params_shard_63.bin",
"format": "raw-shard",
"nbytes": 25174016,
"records": [
{
"name": "model.layers.20.attention.wo.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.20.attention.wqkv.weight",
"shape": [
4096,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 8388608
},
{
"name": "model.layers.20.attention_norm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25165824
},
{
"name": "model.layers.20.ffn_norm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25169920
}
],
"md5sum": "1bb51ea71c1ab50a84ab2f95c2c63175"
},
{
"dataPath": "params_shard_64.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.21.feed_forward.gate_up_proj.weight",
"shape": [
16384,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "c30634533c2fc59d575615fa4ac9214b"
},
{
"dataPath": "params_shard_65.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.21.feed_forward.w2.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "c123a4af918d13adf4a76d66c23f8500"
},
{
"dataPath": "params_shard_66.bin",
"format": "raw-shard",
"nbytes": 25174016,
"records": [
{
"name": "model.layers.21.attention.wo.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.21.attention.wqkv.weight",
"shape": [
4096,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 8388608
},
{
"name": "model.layers.21.attention_norm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25165824
},
{
"name": "model.layers.21.ffn_norm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25169920
}
],
"md5sum": "6554be6c98a0dc256082e9152d1f0e21"
},
{
"dataPath": "params_shard_67.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.22.feed_forward.gate_up_proj.weight",
"shape": [
16384,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "2afc907981b853485b4978d87f9024ca"
},
{
"dataPath": "params_shard_68.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.22.feed_forward.w2.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "fe186966e79790cc22bbeefbc75af8dd"
},
{
"dataPath": "params_shard_69.bin",
"format": "raw-shard",
"nbytes": 25174016,
"records": [
{
"name": "model.layers.22.attention.wo.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.22.attention.wqkv.weight",
"shape": [
4096,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 8388608
},
{
"name": "model.layers.22.attention_norm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25165824
},
{
"name": "model.layers.22.ffn_norm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25169920
}
],
"md5sum": "4c76fe74edbf769084ab1171ed00266f"
},
{
"dataPath": "params_shard_70.bin",
"format": "raw-shard",
"nbytes": 67108864,
"records": [
{
"name": "model.layers.23.feed_forward.gate_up_proj.weight",
"shape": [
16384,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 67108864,
"byteOffset": 0
}
],
"md5sum": "6440fb6e14258b7f4dea6dc77b8262a9"
},
{
"dataPath": "params_shard_71.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.23.feed_forward.w2.weight",
"shape": [
2048,
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "cc5c095b532c915b19d5718c3e6bfe4e"
},
{
"dataPath": "params_shard_72.bin",
"format": "raw-shard",
"nbytes": 379060224,
"records": [
{
"name": "output.weight",
"shape": [
92544,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 379060224,
"byteOffset": 0
}
],
"md5sum": "a5b778eef7ec6da1957b35e1888c3333"
},
{
"dataPath": "params_shard_73.bin",
"format": "raw-shard",
"nbytes": 25178112,
"records": [
{
"name": "model.layers.23.attention.wo.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.23.attention.wqkv.weight",
"shape": [
4096,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 8388608
},
{
"name": "model.layers.23.attention_norm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25165824
},
{
"name": "model.layers.23.ffn_norm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25169920
},
{
"name": "model.norm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 25174016
}
],
"md5sum": "5735df772a182e0db0a8ac4b6937351a"
}
]
}