gemma-7b-it-q4f16_2-MLC / ndarray-cache.json
CharlieFRuan's picture
Initial commit
91fe041 verified
{
"metadata": {
"ParamSize": 282,
"ParamBytes": 5933193216.0,
"BitsPerParam": 5.559536167513375
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 1572864000,
"records": [
{
"name": "model.embed_tokens.weight",
"shape": [
256000,
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1572864000,
"byteOffset": 0
}
],
"md5sum": "c60082db0e4d8f35e7289fd7de2d6953"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 37748736,
"records": [
{
"name": "model.layers.0.mlp.down_proj.q_weight",
"shape": [
3072,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 37748736,
"byteOffset": 0
}
],
"md5sum": "0a3f375cf1e2c9f695595fbdb0460894"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.q_weight",
"shape": [
49152,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "b1b4b490c123e9fda95c5ad6a114814a"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 33042432,
"records": [
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 0
},
{
"name": "model.layers.0.mlp.down_proj.q_scale",
"shape": [
3072,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 6144
},
{
"name": "model.layers.0.mlp.gate_up_proj.q_scale",
"shape": [
49152,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 4724736
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 14161920
},
{
"name": "model.layers.0.self_attn.qkv_proj.q_weight",
"shape": [
12288,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 14168064
}
],
"md5sum": "6c41cde98416da0cfce2f890191b72d4"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 37748736,
"records": [
{
"name": "model.layers.1.mlp.down_proj.q_weight",
"shape": [
3072,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 37748736,
"byteOffset": 0
}
],
"md5sum": "b9b930f7da641c5a595835013ee6becd"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.q_weight",
"shape": [
49152,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "df24aca81ca517f0c6c929ea97f9606d"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.1.self_attn.qkv_proj.q_weight",
"shape": [
12288,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "19f5d91c818853f75337644c5d0460e8"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 37748736,
"records": [
{
"name": "model.layers.2.mlp.down_proj.q_weight",
"shape": [
3072,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 37748736,
"byteOffset": 0
}
],
"md5sum": "914b8140b0803c1be930426d6e4f86eb"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 33048576,
"records": [
{
"name": "model.layers.0.self_attn.qkv_proj.q_scale",
"shape": [
12288,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 0
},
{
"name": "model.layers.0.self_attn.o_proj.q_weight",
"shape": [
3072,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 2359296
},
{
"name": "model.layers.0.self_attn.o_proj.q_scale",
"shape": [
3072,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 8650752
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 9437184
},
{
"name": "model.layers.1.mlp.down_proj.q_scale",
"shape": [
3072,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 9443328
},
{
"name": "model.layers.1.mlp.gate_up_proj.q_scale",
"shape": [
49152,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 14161920
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 23599104
},
{
"name": "model.layers.1.self_attn.qkv_proj.q_scale",
"shape": [
12288,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 23605248
},
{
"name": "model.layers.1.self_attn.o_proj.q_weight",
"shape": [
3072,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 25964544
},
{
"name": "model.layers.1.self_attn.o_proj.q_scale",
"shape": [
3072,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 32256000
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 33042432
}
],
"md5sum": "dda0ce191ffd36396f60d1dde7ecd6c1"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.q_weight",
"shape": [
49152,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "fa05c4b0573277e0e25e9431b8ada963"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 33036288,
"records": [
{
"name": "model.layers.2.mlp.down_proj.q_scale",
"shape": [
3072,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 0
},
{
"name": "model.layers.2.mlp.gate_up_proj.q_scale",
"shape": [
49152,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 4718592
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 14155776
},
{
"name": "model.layers.2.self_attn.qkv_proj.q_weight",
"shape": [
12288,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 14161920
}
],
"md5sum": "970988da55e20eb74b4d7eb951f24e7a"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 37748736,
"records": [
{
"name": "model.layers.3.mlp.down_proj.q_weight",
"shape": [
3072,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 37748736,
"byteOffset": 0
}
],
"md5sum": "895796fa01c5ae32fe106ae0c8251a27"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.q_weight",
"shape": [
49152,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "704305446ba2953b62e9ceebadd07063"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.3.self_attn.qkv_proj.q_weight",
"shape": [
12288,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "028dd1dcf35dac597489b07af9dfdb2f"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 37748736,
"records": [
{
"name": "model.layers.4.mlp.down_proj.q_weight",
"shape": [
3072,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 37748736,
"byteOffset": 0
}
],
"md5sum": "a77612a121069421dd3899b31cdff6b4"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 33048576,
"records": [
{
"name": "model.layers.2.self_attn.qkv_proj.q_scale",
"shape": [
12288,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 0
},
{
"name": "model.layers.2.self_attn.o_proj.q_weight",
"shape": [
3072,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 2359296
},
{
"name": "model.layers.2.self_attn.o_proj.q_scale",
"shape": [
3072,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 8650752
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 9437184
},
{
"name": "model.layers.3.mlp.down_proj.q_scale",
"shape": [
3072,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 9443328
},
{
"name": "model.layers.3.mlp.gate_up_proj.q_scale",
"shape": [
49152,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 14161920
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 23599104
},
{
"name": "model.layers.3.self_attn.qkv_proj.q_scale",
"shape": [
12288,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 23605248
},
{
"name": "model.layers.3.self_attn.o_proj.q_weight",
"shape": [
3072,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 25964544
},
{
"name": "model.layers.3.self_attn.o_proj.q_scale",
"shape": [
3072,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 32256000
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 33042432
}
],
"md5sum": "2350bb04bee0a06eb34afd2647843d21"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.q_weight",
"shape": [
49152,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "4fdf97af5d9e34f3e189cd5d9fa5e88d"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 33036288,
"records": [
{
"name": "model.layers.4.mlp.down_proj.q_scale",
"shape": [
3072,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 0
},
{
"name": "model.layers.4.mlp.gate_up_proj.q_scale",
"shape": [
49152,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 4718592
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 14155776
},
{
"name": "model.layers.4.self_attn.qkv_proj.q_weight",
"shape": [
12288,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 14161920
}
],
"md5sum": "040ea811b5b654731135e29293d63180"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 37748736,
"records": [
{
"name": "model.layers.5.mlp.down_proj.q_weight",
"shape": [
3072,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 37748736,
"byteOffset": 0
}
],
"md5sum": "3111e33cfe433290fbff3775e89e11ae"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.q_weight",
"shape": [
49152,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "15a6a1ec16ab97ad9601da73dcc298b7"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.5.self_attn.qkv_proj.q_weight",
"shape": [
12288,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "c941e6d371dbfe46ca546ad50ccadab2"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.6.self_attn.qkv_proj.q_weight",
"shape": [
12288,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "ef2cc584a481627c4dcbc7c625ad6dcc"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 33042432,
"records": [
{
"name": "model.layers.4.self_attn.qkv_proj.q_scale",
"shape": [
12288,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 0
},
{
"name": "model.layers.4.self_attn.o_proj.q_weight",
"shape": [
3072,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 2359296
},
{
"name": "model.layers.4.self_attn.o_proj.q_scale",
"shape": [
3072,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 8650752
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 9437184
},
{
"name": "model.layers.5.mlp.down_proj.q_scale",
"shape": [
3072,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 9443328
},
{
"name": "model.layers.5.mlp.gate_up_proj.q_scale",
"shape": [
49152,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 14161920
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 23599104
},
{
"name": "model.layers.5.self_attn.qkv_proj.q_scale",
"shape": [
12288,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 23605248
},
{
"name": "model.layers.5.self_attn.o_proj.q_weight",
"shape": [
3072,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 25964544
},
{
"name": "model.layers.5.self_attn.o_proj.q_scale",
"shape": [
3072,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 32256000
}
],
"md5sum": "ca0f9e187a584fc256c0230643afbbc6"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 37748736,
"records": [
{
"name": "model.layers.10.mlp.down_proj.q_weight",
"shape": [
3072,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 37748736,
"byteOffset": 0
}
],
"md5sum": "5a645ad8a7ad21de07365029a99fe953"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "model.layers.10.mlp.gate_up_proj.q_weight",
"shape": [
49152,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "0c37067c216179dca79b22da7274b3a1"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.10.self_attn.qkv_proj.q_weight",
"shape": [
12288,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "f303958ead6e2daaa3336967c361c85f"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 37748736,
"records": [
{
"name": "model.layers.11.mlp.down_proj.q_weight",
"shape": [
3072,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 37748736,
"byteOffset": 0
}
],
"md5sum": "b3ad0f343d0f749ffd6f8ec5b7c41213"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 33048576,
"records": [
{
"name": "model.layers.6.self_attn.qkv_proj.q_scale",
"shape": [
12288,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 0
},
{
"name": "model.layers.6.self_attn.o_proj.q_weight",
"shape": [
3072,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 2359296
},
{
"name": "model.layers.6.self_attn.o_proj.q_scale",
"shape": [
3072,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 8650752
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 9437184
},
{
"name": "model.layers.10.mlp.down_proj.q_scale",
"shape": [
3072,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 9443328
},
{
"name": "model.layers.10.mlp.gate_up_proj.q_scale",
"shape": [
49152,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 14161920
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 23599104
},
{
"name": "model.layers.10.self_attn.qkv_proj.q_scale",
"shape": [
12288,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 23605248
},
{
"name": "model.layers.10.self_attn.o_proj.q_weight",
"shape": [
3072,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 25964544
},
{
"name": "model.layers.10.self_attn.o_proj.q_scale",
"shape": [
3072,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 32256000
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 33042432
}
],
"md5sum": "36cb87785900c8b03ea04cadcd2c7b61"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "model.layers.11.mlp.gate_up_proj.q_weight",
"shape": [
49152,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "888fe098f5448c40531e0f73801a71cc"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 33036288,
"records": [
{
"name": "model.layers.11.mlp.down_proj.q_scale",
"shape": [
3072,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 0
},
{
"name": "model.layers.11.mlp.gate_up_proj.q_scale",
"shape": [
49152,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 4718592
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 14155776
},
{
"name": "model.layers.11.self_attn.qkv_proj.q_weight",
"shape": [
12288,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 14161920
}
],
"md5sum": "44056dd4b713c6d0c292d5876b50d1cb"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 37748736,
"records": [
{
"name": "model.layers.12.mlp.down_proj.q_weight",
"shape": [
3072,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 37748736,
"byteOffset": 0
}
],
"md5sum": "8979fcbb6ffc9999a802dcb36db2c110"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.q_weight",
"shape": [
49152,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "c1b503e36756e5fa64fa6d6c536935c9"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.12.self_attn.qkv_proj.q_weight",
"shape": [
12288,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "cfa48f27791f04a6afc001c498096009"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 37748736,
"records": [
{
"name": "model.layers.13.mlp.down_proj.q_weight",
"shape": [
3072,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 37748736,
"byteOffset": 0
}
],
"md5sum": "47cec6b9dd1fa4bd2b5f112b232f9b68"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 33048576,
"records": [
{
"name": "model.layers.11.self_attn.qkv_proj.q_scale",
"shape": [
12288,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 0
},
{
"name": "model.layers.11.self_attn.o_proj.q_weight",
"shape": [
3072,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 2359296
},
{
"name": "model.layers.11.self_attn.o_proj.q_scale",
"shape": [
3072,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 8650752
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 9437184
},
{
"name": "model.layers.12.mlp.down_proj.q_scale",
"shape": [
3072,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 9443328
},
{
"name": "model.layers.12.mlp.gate_up_proj.q_scale",
"shape": [
49152,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 14161920
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 23599104
},
{
"name": "model.layers.12.self_attn.qkv_proj.q_scale",
"shape": [
12288,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 23605248
},
{
"name": "model.layers.12.self_attn.o_proj.q_weight",
"shape": [
3072,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 25964544
},
{
"name": "model.layers.12.self_attn.o_proj.q_scale",
"shape": [
3072,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 32256000
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 33042432
}
],
"md5sum": "9911946bdd7cb423c25569b240fcaa84"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "model.layers.13.mlp.gate_up_proj.q_weight",
"shape": [
49152,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "21dfcc93791de79d358495202b300837"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 33036288,
"records": [
{
"name": "model.layers.13.mlp.down_proj.q_scale",
"shape": [
3072,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 0
},
{
"name": "model.layers.13.mlp.gate_up_proj.q_scale",
"shape": [
49152,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 4718592
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 14155776
},
{
"name": "model.layers.13.self_attn.qkv_proj.q_weight",
"shape": [
12288,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 14161920
}
],
"md5sum": "e99baebdb292d9f650b2ea5541b25c86"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 37748736,
"records": [
{
"name": "model.layers.14.mlp.down_proj.q_weight",
"shape": [
3072,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 37748736,
"byteOffset": 0
}
],
"md5sum": "439729033342c3139e80c0b0414c1728"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.q_weight",
"shape": [
49152,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "5d9354c58b9395b7e0e9b044725dcc20"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.14.self_attn.qkv_proj.q_weight",
"shape": [
12288,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "4291b97ecc1fab9b6515a05a8dbd24bb"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.15.self_attn.qkv_proj.q_weight",
"shape": [
12288,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "0f83efe157e1c158f2c27a24b2828d69"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 33042432,
"records": [
{
"name": "model.layers.13.self_attn.qkv_proj.q_scale",
"shape": [
12288,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 0
},
{
"name": "model.layers.13.self_attn.o_proj.q_weight",
"shape": [
3072,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 2359296
},
{
"name": "model.layers.13.self_attn.o_proj.q_scale",
"shape": [
3072,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 8650752
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 9437184
},
{
"name": "model.layers.14.mlp.down_proj.q_scale",
"shape": [
3072,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 9443328
},
{
"name": "model.layers.14.mlp.gate_up_proj.q_scale",
"shape": [
49152,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 14161920
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 23599104
},
{
"name": "model.layers.14.self_attn.qkv_proj.q_scale",
"shape": [
12288,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 23605248
},
{
"name": "model.layers.14.self_attn.o_proj.q_weight",
"shape": [
3072,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 25964544
},
{
"name": "model.layers.14.self_attn.o_proj.q_scale",
"shape": [
3072,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 32256000
}
],
"md5sum": "d5f77226ac89a3c55700e7fd7812778a"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 37748736,
"records": [
{
"name": "model.layers.6.mlp.down_proj.q_weight",
"shape": [
3072,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 37748736,
"byteOffset": 0
}
],
"md5sum": "a24e5f432c8b50da8d46aaf81ad468ec"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.q_weight",
"shape": [
49152,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "d7bbc1d181bf43907a1a092f3c50f7ab"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 37748736,
"records": [
{
"name": "model.layers.7.mlp.down_proj.q_weight",
"shape": [
3072,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 37748736,
"byteOffset": 0
}
],
"md5sum": "fd461ae89dccedcef9638e2cb48984b7"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.q_weight",
"shape": [
49152,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "2a0d3cff4f52ceec8e8f6c40af9c0a50"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 28329984,
"records": [
{
"name": "model.layers.15.self_attn.qkv_proj.q_scale",
"shape": [
12288,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 0
},
{
"name": "model.layers.15.self_attn.o_proj.q_weight",
"shape": [
3072,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 2359296
},
{
"name": "model.layers.15.self_attn.o_proj.q_scale",
"shape": [
3072,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 8650752
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 9437184
},
{
"name": "model.layers.6.mlp.down_proj.q_scale",
"shape": [
3072,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 9443328
},
{
"name": "model.layers.6.mlp.gate_up_proj.q_scale",
"shape": [
49152,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 14161920
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 23599104
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 23605248
},
{
"name": "model.layers.7.mlp.down_proj.q_scale",
"shape": [
3072,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 23611392
}
],
"md5sum": "4c1d3f579707ec96351b6caf43eb61f7"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 30676992,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.q_scale",
"shape": [
49152,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 0
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 9437184
},
{
"name": "model.layers.7.self_attn.qkv_proj.q_weight",
"shape": [
12288,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 9443328
},
{
"name": "model.layers.7.self_attn.qkv_proj.q_scale",
"shape": [
12288,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 28317696
}
],
"md5sum": "e7b9ca8c828248cdf4e043d02e77398b"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 37748736,
"records": [
{
"name": "model.layers.8.mlp.down_proj.q_weight",
"shape": [
3072,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 37748736,
"byteOffset": 0
}
],
"md5sum": "4c795c1fb95ddbc0651868a79f712180"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.q_weight",
"shape": [
49152,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "61bf50a5ea090aa9cb45fedb191a8dd9"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.8.self_attn.qkv_proj.q_weight",
"shape": [
12288,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "61b7de5a963bfe8f5d912e6a11fa5104"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 37748736,
"records": [
{
"name": "model.layers.9.mlp.down_proj.q_weight",
"shape": [
3072,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 37748736,
"byteOffset": 0
}
],
"md5sum": "a83b0c205aac3c3191f9f8f1100581c5"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 30689280,
"records": [
{
"name": "model.layers.7.self_attn.o_proj.q_weight",
"shape": [
3072,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 0
},
{
"name": "model.layers.7.self_attn.o_proj.q_scale",
"shape": [
3072,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 6291456
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 7077888
},
{
"name": "model.layers.8.mlp.down_proj.q_scale",
"shape": [
3072,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 7084032
},
{
"name": "model.layers.8.mlp.gate_up_proj.q_scale",
"shape": [
49152,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 11802624
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 21239808
},
{
"name": "model.layers.8.self_attn.qkv_proj.q_scale",
"shape": [
12288,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 21245952
},
{
"name": "model.layers.8.self_attn.o_proj.q_weight",
"shape": [
3072,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 23605248
},
{
"name": "model.layers.8.self_attn.o_proj.q_scale",
"shape": [
3072,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 29896704
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 30683136
}
],
"md5sum": "641084e3ed190be0ad41361c1f891679"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.q_weight",
"shape": [
49152,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "91068e71cafed5fd13a8b18b24a2abbd"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 33036288,
"records": [
{
"name": "model.layers.9.mlp.down_proj.q_scale",
"shape": [
3072,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 0
},
{
"name": "model.layers.9.mlp.gate_up_proj.q_scale",
"shape": [
49152,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 4718592
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 14155776
},
{
"name": "model.layers.9.self_attn.qkv_proj.q_weight",
"shape": [
12288,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 14161920
}
],
"md5sum": "c717ea3935ddf0deb10e7b0f579eedc5"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 37748736,
"records": [
{
"name": "model.layers.15.mlp.down_proj.q_weight",
"shape": [
3072,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 37748736,
"byteOffset": 0
}
],
"md5sum": "d33d060e3517f6d3c4031d3860ada555"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "model.layers.15.mlp.gate_up_proj.q_weight",
"shape": [
49152,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "d1e366e579be3b2ea952452d40c29871"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 37748736,
"records": [
{
"name": "model.layers.16.mlp.down_proj.q_weight",
"shape": [
3072,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 37748736,
"byteOffset": 0
}
],
"md5sum": "95b35efde9d72c464a57053ebfb8302e"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.q_weight",
"shape": [
49152,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "9c4ee949ac59a0d6e41cd9676b15bd8b"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 28329984,
"records": [
{
"name": "model.layers.9.self_attn.qkv_proj.q_scale",
"shape": [
12288,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 0
},
{
"name": "model.layers.9.self_attn.o_proj.q_weight",
"shape": [
3072,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 2359296
},
{
"name": "model.layers.9.self_attn.o_proj.q_scale",
"shape": [
3072,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 8650752
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 9437184
},
{
"name": "model.layers.15.mlp.down_proj.q_scale",
"shape": [
3072,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 9443328
},
{
"name": "model.layers.15.mlp.gate_up_proj.q_scale",
"shape": [
49152,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 14161920
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 23599104
},
{
"name": "model.layers.16.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 23605248
},
{
"name": "model.layers.16.mlp.down_proj.q_scale",
"shape": [
3072,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 23611392
}
],
"md5sum": "0ab3121d00fb54cf47b02726f2f6e265"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 30676992,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.q_scale",
"shape": [
49152,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 0
},
{
"name": "model.layers.16.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 9437184
},
{
"name": "model.layers.16.self_attn.qkv_proj.q_weight",
"shape": [
12288,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 9443328
},
{
"name": "model.layers.16.self_attn.qkv_proj.q_scale",
"shape": [
12288,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 28317696
}
],
"md5sum": "1c5c872c45d3a9a18d4f13a7999745d8"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 37748736,
"records": [
{
"name": "model.layers.17.mlp.down_proj.q_weight",
"shape": [
3072,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 37748736,
"byteOffset": 0
}
],
"md5sum": "8f4983e8d268430490a7bfd2089271b2"
},
{
"dataPath": "params_shard_62.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "model.layers.17.mlp.gate_up_proj.q_weight",
"shape": [
49152,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "b3562d605f03006f65eb486771e9e3f7"
},
{
"dataPath": "params_shard_63.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.17.self_attn.qkv_proj.q_weight",
"shape": [
12288,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "56bb44a8a76c8da8bed0a852d027b3a2"
},
{
"dataPath": "params_shard_64.bin",
"format": "raw-shard",
"nbytes": 37748736,
"records": [
{
"name": "model.layers.18.mlp.down_proj.q_weight",
"shape": [
3072,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 37748736,
"byteOffset": 0
}
],
"md5sum": "6cca4e4da80cc4104bcf42fe370c2e00"
},
{
"dataPath": "params_shard_65.bin",
"format": "raw-shard",
"nbytes": 30689280,
"records": [
{
"name": "model.layers.16.self_attn.o_proj.q_weight",
"shape": [
3072,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 0
},
{
"name": "model.layers.16.self_attn.o_proj.q_scale",
"shape": [
3072,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 6291456
},
{
"name": "model.layers.17.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 7077888
},
{
"name": "model.layers.17.mlp.down_proj.q_scale",
"shape": [
3072,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 7084032
},
{
"name": "model.layers.17.mlp.gate_up_proj.q_scale",
"shape": [
49152,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 11802624
},
{
"name": "model.layers.17.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 21239808
},
{
"name": "model.layers.17.self_attn.qkv_proj.q_scale",
"shape": [
12288,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 21245952
},
{
"name": "model.layers.17.self_attn.o_proj.q_weight",
"shape": [
3072,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 23605248
},
{
"name": "model.layers.17.self_attn.o_proj.q_scale",
"shape": [
3072,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 29896704
},
{
"name": "model.layers.18.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 30683136
}
],
"md5sum": "4944d992b1b0d8b041ea6c4762b11b09"
},
{
"dataPath": "params_shard_66.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "model.layers.18.mlp.gate_up_proj.q_weight",
"shape": [
49152,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "3d78ee2bc235a467be96bdce00bc4579"
},
{
"dataPath": "params_shard_67.bin",
"format": "raw-shard",
"nbytes": 33036288,
"records": [
{
"name": "model.layers.18.mlp.down_proj.q_scale",
"shape": [
3072,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 0
},
{
"name": "model.layers.18.mlp.gate_up_proj.q_scale",
"shape": [
49152,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 4718592
},
{
"name": "model.layers.18.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 14155776
},
{
"name": "model.layers.18.self_attn.qkv_proj.q_weight",
"shape": [
12288,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 14161920
}
],
"md5sum": "a756e530dee48ebb741ead864edbc566"
},
{
"dataPath": "params_shard_68.bin",
"format": "raw-shard",
"nbytes": 37748736,
"records": [
{
"name": "model.layers.19.mlp.down_proj.q_weight",
"shape": [
3072,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 37748736,
"byteOffset": 0
}
],
"md5sum": "e7ca1640cce62a24eb185527535cc2a4"
},
{
"dataPath": "params_shard_69.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "model.layers.19.mlp.gate_up_proj.q_weight",
"shape": [
49152,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "21c78957b1967bc0e9d8a0c667226b53"
},
{
"dataPath": "params_shard_70.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.19.self_attn.qkv_proj.q_weight",
"shape": [
12288,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "dad0426f022ac9380153e56b771a205c"
},
{
"dataPath": "params_shard_71.bin",
"format": "raw-shard",
"nbytes": 37748736,
"records": [
{
"name": "model.layers.20.mlp.down_proj.q_weight",
"shape": [
3072,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 37748736,
"byteOffset": 0
}
],
"md5sum": "e6518b4dd0883453191e06ed464c5145"
},
{
"dataPath": "params_shard_72.bin",
"format": "raw-shard",
"nbytes": 33048576,
"records": [
{
"name": "model.layers.18.self_attn.qkv_proj.q_scale",
"shape": [
12288,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 0
},
{
"name": "model.layers.18.self_attn.o_proj.q_weight",
"shape": [
3072,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 2359296
},
{
"name": "model.layers.18.self_attn.o_proj.q_scale",
"shape": [
3072,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 8650752
},
{
"name": "model.layers.19.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 9437184
},
{
"name": "model.layers.19.mlp.down_proj.q_scale",
"shape": [
3072,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 9443328
},
{
"name": "model.layers.19.mlp.gate_up_proj.q_scale",
"shape": [
49152,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 14161920
},
{
"name": "model.layers.19.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 23599104
},
{
"name": "model.layers.19.self_attn.qkv_proj.q_scale",
"shape": [
12288,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 23605248
},
{
"name": "model.layers.19.self_attn.o_proj.q_weight",
"shape": [
3072,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 25964544
},
{
"name": "model.layers.19.self_attn.o_proj.q_scale",
"shape": [
3072,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 32256000
},
{
"name": "model.layers.20.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 33042432
}
],
"md5sum": "5a31c95c108b21de43eb104e4c32b9c5"
},
{
"dataPath": "params_shard_73.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "model.layers.20.mlp.gate_up_proj.q_weight",
"shape": [
49152,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "bfcb8919f7eb1a560c59de147e4644a9"
},
{
"dataPath": "params_shard_74.bin",
"format": "raw-shard",
"nbytes": 33036288,
"records": [
{
"name": "model.layers.20.mlp.down_proj.q_scale",
"shape": [
3072,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 0
},
{
"name": "model.layers.20.mlp.gate_up_proj.q_scale",
"shape": [
49152,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 4718592
},
{
"name": "model.layers.20.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 14155776
},
{
"name": "model.layers.20.self_attn.qkv_proj.q_weight",
"shape": [
12288,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 14161920
}
],
"md5sum": "a17ed1d6946390eec3d604ae75cf0716"
},
{
"dataPath": "params_shard_75.bin",
"format": "raw-shard",
"nbytes": 37748736,
"records": [
{
"name": "model.layers.21.mlp.down_proj.q_weight",
"shape": [
3072,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 37748736,
"byteOffset": 0
}
],
"md5sum": "2b21178688c488bfcf39ba2e5e976744"
},
{
"dataPath": "params_shard_76.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "model.layers.21.mlp.gate_up_proj.q_weight",
"shape": [
49152,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "36ae8787b767df38c9911ad4534b4ff6"
},
{
"dataPath": "params_shard_77.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.21.self_attn.qkv_proj.q_weight",
"shape": [
12288,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "4dfbe51c1e27f3c2163064d8bf1fe47b"
},
{
"dataPath": "params_shard_78.bin",
"format": "raw-shard",
"nbytes": 37748736,
"records": [
{
"name": "model.layers.22.mlp.down_proj.q_weight",
"shape": [
3072,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 37748736,
"byteOffset": 0
}
],
"md5sum": "9ad5a6b391692cd7ba2f211394f4841d"
},
{
"dataPath": "params_shard_79.bin",
"format": "raw-shard",
"nbytes": 33048576,
"records": [
{
"name": "model.layers.20.self_attn.qkv_proj.q_scale",
"shape": [
12288,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 0
},
{
"name": "model.layers.20.self_attn.o_proj.q_weight",
"shape": [
3072,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 2359296
},
{
"name": "model.layers.20.self_attn.o_proj.q_scale",
"shape": [
3072,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 8650752
},
{
"name": "model.layers.21.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 9437184
},
{
"name": "model.layers.21.mlp.down_proj.q_scale",
"shape": [
3072,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 9443328
},
{
"name": "model.layers.21.mlp.gate_up_proj.q_scale",
"shape": [
49152,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 14161920
},
{
"name": "model.layers.21.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 23599104
},
{
"name": "model.layers.21.self_attn.qkv_proj.q_scale",
"shape": [
12288,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 23605248
},
{
"name": "model.layers.21.self_attn.o_proj.q_weight",
"shape": [
3072,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 25964544
},
{
"name": "model.layers.21.self_attn.o_proj.q_scale",
"shape": [
3072,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 32256000
},
{
"name": "model.layers.22.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 33042432
}
],
"md5sum": "142d51dc1cffe753399d2e152731067c"
},
{
"dataPath": "params_shard_80.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "model.layers.22.mlp.gate_up_proj.q_weight",
"shape": [
49152,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "e0de82327d5ecf92c4d6a7bbe8e903fe"
},
{
"dataPath": "params_shard_81.bin",
"format": "raw-shard",
"nbytes": 33036288,
"records": [
{
"name": "model.layers.22.mlp.down_proj.q_scale",
"shape": [
3072,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 0
},
{
"name": "model.layers.22.mlp.gate_up_proj.q_scale",
"shape": [
49152,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 4718592
},
{
"name": "model.layers.22.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 14155776
},
{
"name": "model.layers.22.self_attn.qkv_proj.q_weight",
"shape": [
12288,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 14161920
}
],
"md5sum": "73a8a7599dc02a49a1b61f70c01b6994"
},
{
"dataPath": "params_shard_82.bin",
"format": "raw-shard",
"nbytes": 37748736,
"records": [
{
"name": "model.layers.23.mlp.down_proj.q_weight",
"shape": [
3072,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 37748736,
"byteOffset": 0
}
],
"md5sum": "923e90249de0a8cc9c2bfd2fa160e42a"
},
{
"dataPath": "params_shard_83.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "model.layers.23.mlp.gate_up_proj.q_weight",
"shape": [
49152,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "377b2fd5902e392774e9dc38cc649031"
},
{
"dataPath": "params_shard_84.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.23.self_attn.qkv_proj.q_weight",
"shape": [
12288,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "e8e9c6fe7760ae3517a0e34e1ede66e4"
},
{
"dataPath": "params_shard_85.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.24.self_attn.qkv_proj.q_weight",
"shape": [
12288,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "c33d5fe78f612f2c382f21b7ff2e0466"
},
{
"dataPath": "params_shard_86.bin",
"format": "raw-shard",
"nbytes": 33042432,
"records": [
{
"name": "model.layers.22.self_attn.qkv_proj.q_scale",
"shape": [
12288,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 0
},
{
"name": "model.layers.22.self_attn.o_proj.q_weight",
"shape": [
3072,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 2359296
},
{
"name": "model.layers.22.self_attn.o_proj.q_scale",
"shape": [
3072,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 8650752
},
{
"name": "model.layers.23.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 9437184
},
{
"name": "model.layers.23.mlp.down_proj.q_scale",
"shape": [
3072,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 9443328
},
{
"name": "model.layers.23.mlp.gate_up_proj.q_scale",
"shape": [
49152,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 14161920
},
{
"name": "model.layers.23.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 23599104
},
{
"name": "model.layers.23.self_attn.qkv_proj.q_scale",
"shape": [
12288,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 23605248
},
{
"name": "model.layers.23.self_attn.o_proj.q_weight",
"shape": [
3072,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 25964544
},
{
"name": "model.layers.23.self_attn.o_proj.q_scale",
"shape": [
3072,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 32256000
}
],
"md5sum": "ce934c7e6c41e76269127505d5ddbc4e"
},
{
"dataPath": "params_shard_87.bin",
"format": "raw-shard",
"nbytes": 37748736,
"records": [
{
"name": "model.layers.24.mlp.down_proj.q_weight",
"shape": [
3072,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 37748736,
"byteOffset": 0
}
],
"md5sum": "0151efeb7b93d74d86dd9291f0266ded"
},
{
"dataPath": "params_shard_88.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "model.layers.24.mlp.gate_up_proj.q_weight",
"shape": [
49152,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "08663cb64732b524fb1515204569256e"
},
{
"dataPath": "params_shard_89.bin",
"format": "raw-shard",
"nbytes": 37748736,
"records": [
{
"name": "model.layers.25.mlp.down_proj.q_weight",
"shape": [
3072,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 37748736,
"byteOffset": 0
}
],
"md5sum": "8587334165a82190371cc1dce4c975f8"
},
{
"dataPath": "params_shard_90.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "model.layers.25.mlp.gate_up_proj.q_weight",
"shape": [
49152,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "4264e7b9a235389f27b36fefa35d0d55"
},
{
"dataPath": "params_shard_91.bin",
"format": "raw-shard",
"nbytes": 28329984,
"records": [
{
"name": "model.layers.24.self_attn.qkv_proj.q_scale",
"shape": [
12288,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 0
},
{
"name": "model.layers.24.self_attn.o_proj.q_weight",
"shape": [
3072,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 2359296
},
{
"name": "model.layers.24.self_attn.o_proj.q_scale",
"shape": [
3072,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 8650752
},
{
"name": "model.layers.24.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 9437184
},
{
"name": "model.layers.24.mlp.down_proj.q_scale",
"shape": [
3072,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 9443328
},
{
"name": "model.layers.24.mlp.gate_up_proj.q_scale",
"shape": [
49152,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 14161920
},
{
"name": "model.layers.24.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 23599104
},
{
"name": "model.layers.25.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 23605248
},
{
"name": "model.layers.25.mlp.down_proj.q_scale",
"shape": [
3072,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 23611392
}
],
"md5sum": "ff0d65b3d60947dcd0bb54b130e7054b"
},
{
"dataPath": "params_shard_92.bin",
"format": "raw-shard",
"nbytes": 30676992,
"records": [
{
"name": "model.layers.25.mlp.gate_up_proj.q_scale",
"shape": [
49152,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 0
},
{
"name": "model.layers.25.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 9437184
},
{
"name": "model.layers.25.self_attn.qkv_proj.q_weight",
"shape": [
12288,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 9443328
},
{
"name": "model.layers.25.self_attn.qkv_proj.q_scale",
"shape": [
12288,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 28317696
}
],
"md5sum": "d379684e292777f3e0feeb5c8c0f477d"
},
{
"dataPath": "params_shard_93.bin",
"format": "raw-shard",
"nbytes": 37748736,
"records": [
{
"name": "model.layers.26.mlp.down_proj.q_weight",
"shape": [
3072,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 37748736,
"byteOffset": 0
}
],
"md5sum": "94bd6c4a7893492f003ec40434ef33d9"
},
{
"dataPath": "params_shard_94.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "model.layers.26.mlp.gate_up_proj.q_weight",
"shape": [
49152,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "d51d6761865d3fec1b9ae7f6e6a12fdf"
},
{
"dataPath": "params_shard_95.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.26.self_attn.qkv_proj.q_weight",
"shape": [
12288,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "8091897b86e142f4ad2788235ea7f01f"
},
{
"dataPath": "params_shard_96.bin",
"format": "raw-shard",
"nbytes": 37748736,
"records": [
{
"name": "model.layers.27.mlp.down_proj.q_weight",
"shape": [
3072,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 37748736,
"byteOffset": 0
}
],
"md5sum": "c300ececd456b552ed7999a8b9da9b97"
},
{
"dataPath": "params_shard_97.bin",
"format": "raw-shard",
"nbytes": 30689280,
"records": [
{
"name": "model.layers.25.self_attn.o_proj.q_weight",
"shape": [
3072,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 0
},
{
"name": "model.layers.25.self_attn.o_proj.q_scale",
"shape": [
3072,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 6291456
},
{
"name": "model.layers.26.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 7077888
},
{
"name": "model.layers.26.mlp.down_proj.q_scale",
"shape": [
3072,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 7084032
},
{
"name": "model.layers.26.mlp.gate_up_proj.q_scale",
"shape": [
49152,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 11802624
},
{
"name": "model.layers.26.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 21239808
},
{
"name": "model.layers.26.self_attn.qkv_proj.q_scale",
"shape": [
12288,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 21245952
},
{
"name": "model.layers.26.self_attn.o_proj.q_weight",
"shape": [
3072,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 23605248
},
{
"name": "model.layers.26.self_attn.o_proj.q_scale",
"shape": [
3072,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 29896704
},
{
"name": "model.layers.27.input_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 30683136
}
],
"md5sum": "0329898e07322fa4c059b54dfe321db3"
},
{
"dataPath": "params_shard_98.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "model.layers.27.mlp.gate_up_proj.q_weight",
"shape": [
49152,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "3723ed2c9b615ba20a1469402e68ed5e"
},
{
"dataPath": "params_shard_99.bin",
"format": "raw-shard",
"nbytes": 33036288,
"records": [
{
"name": "model.layers.27.mlp.down_proj.q_scale",
"shape": [
3072,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 0
},
{
"name": "model.layers.27.mlp.gate_up_proj.q_scale",
"shape": [
49152,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 4718592
},
{
"name": "model.layers.27.post_attention_layernorm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 14155776
},
{
"name": "model.layers.27.self_attn.qkv_proj.q_weight",
"shape": [
12288,
384
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 14161920
}
],
"md5sum": "652911ef304a85f54ad9655496c6b050"
},
{
"dataPath": "params_shard_100.bin",
"format": "raw-shard",
"nbytes": 9443328,
"records": [
{
"name": "model.layers.27.self_attn.qkv_proj.q_scale",
"shape": [
12288,
96
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 0
},
{
"name": "model.layers.27.self_attn.o_proj.q_weight",
"shape": [
3072,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 2359296
},
{
"name": "model.layers.27.self_attn.o_proj.q_scale",
"shape": [
3072,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 8650752
},
{
"name": "model.norm.weight",
"shape": [
3072
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 9437184
}
],
"md5sum": "3ab9be0e91ef4f8193bdd1d5fa3189ed"
}
]
}