|
{
|
|
"metadata": {
|
|
"ParamSize": 283,
|
|
"ParamBytes": 1807423488.0,
|
|
"BitsPerParam": 4.008977295278914
|
|
},
|
|
"records": [
|
|
{
|
|
"dataPath": "params_shard_0.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 25165824,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.21.mlp.gate_up_proj.q_weight",
|
|
"shape": [
|
|
16384,
|
|
384
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 25165824,
|
|
"byteOffset": 0
|
|
}
|
|
],
|
|
"md5sum": "6632bd7389535845b879dfa0f75f2876"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_1.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 31481856,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.20.input_layernorm.weight",
|
|
"shape": [
|
|
3072
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 6144,
|
|
"byteOffset": 0
|
|
},
|
|
{
|
|
"name": "model.layers.20.mlp.down_proj.q_weight",
|
|
"shape": [
|
|
3072,
|
|
1024
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 12582912,
|
|
"byteOffset": 6144
|
|
},
|
|
{
|
|
"name": "model.layers.20.mlp.down_proj.q_scale",
|
|
"shape": [
|
|
3072,
|
|
256
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 1572864,
|
|
"byteOffset": 12589056
|
|
},
|
|
{
|
|
"name": "model.layers.20.post_attention_layernorm.weight",
|
|
"shape": [
|
|
3072
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 6144,
|
|
"byteOffset": 14161920
|
|
},
|
|
{
|
|
"name": "model.layers.21.input_layernorm.weight",
|
|
"shape": [
|
|
3072
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 6144,
|
|
"byteOffset": 14168064
|
|
},
|
|
{
|
|
"name": "model.layers.21.mlp.down_proj.q_weight",
|
|
"shape": [
|
|
3072,
|
|
1024
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 12582912,
|
|
"byteOffset": 14174208
|
|
},
|
|
{
|
|
"name": "model.layers.21.mlp.down_proj.q_scale",
|
|
"shape": [
|
|
3072,
|
|
256
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 1572864,
|
|
"byteOffset": 26757120
|
|
},
|
|
{
|
|
"name": "model.layers.21.mlp.gate_up_proj.q_scale",
|
|
"shape": [
|
|
16384,
|
|
96
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 3145728,
|
|
"byteOffset": 28329984
|
|
},
|
|
{
|
|
"name": "model.layers.21.post_attention_layernorm.weight",
|
|
"shape": [
|
|
3072
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 6144,
|
|
"byteOffset": 31475712
|
|
}
|
|
],
|
|
"md5sum": "8dabf0ba32d27198640c6ae060a73dbb"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_2.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 25165824,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.22.mlp.gate_up_proj.q_weight",
|
|
"shape": [
|
|
16384,
|
|
384
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 25165824,
|
|
"byteOffset": 0
|
|
}
|
|
],
|
|
"md5sum": "c1145df6148b9e95bef3091abf497238"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_3.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 31469568,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.21.self_attn.qkv_proj.q_weight",
|
|
"shape": [
|
|
5120,
|
|
384
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 7864320,
|
|
"byteOffset": 0
|
|
},
|
|
{
|
|
"name": "model.layers.21.self_attn.qkv_proj.q_scale",
|
|
"shape": [
|
|
5120,
|
|
96
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 983040,
|
|
"byteOffset": 7864320
|
|
},
|
|
{
|
|
"name": "model.layers.21.self_attn.o_proj.q_weight",
|
|
"shape": [
|
|
3072,
|
|
384
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 4718592,
|
|
"byteOffset": 8847360
|
|
},
|
|
{
|
|
"name": "model.layers.21.self_attn.o_proj.q_scale",
|
|
"shape": [
|
|
3072,
|
|
96
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 589824,
|
|
"byteOffset": 13565952
|
|
},
|
|
{
|
|
"name": "model.layers.22.input_layernorm.weight",
|
|
"shape": [
|
|
3072
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 6144,
|
|
"byteOffset": 14155776
|
|
},
|
|
{
|
|
"name": "model.layers.22.mlp.down_proj.q_weight",
|
|
"shape": [
|
|
3072,
|
|
1024
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 12582912,
|
|
"byteOffset": 14161920
|
|
},
|
|
{
|
|
"name": "model.layers.22.mlp.down_proj.q_scale",
|
|
"shape": [
|
|
3072,
|
|
256
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 1572864,
|
|
"byteOffset": 26744832
|
|
},
|
|
{
|
|
"name": "model.layers.22.mlp.gate_up_proj.q_scale",
|
|
"shape": [
|
|
16384,
|
|
96
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 3145728,
|
|
"byteOffset": 28317696
|
|
},
|
|
{
|
|
"name": "model.layers.22.post_attention_layernorm.weight",
|
|
"shape": [
|
|
3072
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 6144,
|
|
"byteOffset": 31463424
|
|
}
|
|
],
|
|
"md5sum": "23318c80c78c8cd77dbcd3ee0d7ec120"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_4.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 25165824,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.23.mlp.gate_up_proj.q_weight",
|
|
"shape": [
|
|
16384,
|
|
384
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 25165824,
|
|
"byteOffset": 0
|
|
}
|
|
],
|
|
"md5sum": "f3f20461c8d1783d9f0a80674f0f16e7"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_5.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 31469568,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.22.self_attn.qkv_proj.q_weight",
|
|
"shape": [
|
|
5120,
|
|
384
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 7864320,
|
|
"byteOffset": 0
|
|
},
|
|
{
|
|
"name": "model.layers.22.self_attn.qkv_proj.q_scale",
|
|
"shape": [
|
|
5120,
|
|
96
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 983040,
|
|
"byteOffset": 7864320
|
|
},
|
|
{
|
|
"name": "model.layers.22.self_attn.o_proj.q_weight",
|
|
"shape": [
|
|
3072,
|
|
384
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 4718592,
|
|
"byteOffset": 8847360
|
|
},
|
|
{
|
|
"name": "model.layers.22.self_attn.o_proj.q_scale",
|
|
"shape": [
|
|
3072,
|
|
96
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 589824,
|
|
"byteOffset": 13565952
|
|
},
|
|
{
|
|
"name": "model.layers.23.input_layernorm.weight",
|
|
"shape": [
|
|
3072
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 6144,
|
|
"byteOffset": 14155776
|
|
},
|
|
{
|
|
"name": "model.layers.23.mlp.down_proj.q_weight",
|
|
"shape": [
|
|
3072,
|
|
1024
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 12582912,
|
|
"byteOffset": 14161920
|
|
},
|
|
{
|
|
"name": "model.layers.23.mlp.down_proj.q_scale",
|
|
"shape": [
|
|
3072,
|
|
256
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 1572864,
|
|
"byteOffset": 26744832
|
|
},
|
|
{
|
|
"name": "model.layers.23.mlp.gate_up_proj.q_scale",
|
|
"shape": [
|
|
16384,
|
|
96
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 3145728,
|
|
"byteOffset": 28317696
|
|
},
|
|
{
|
|
"name": "model.layers.23.post_attention_layernorm.weight",
|
|
"shape": [
|
|
3072
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 6144,
|
|
"byteOffset": 31463424
|
|
}
|
|
],
|
|
"md5sum": "0cf4af4293f1b2758c817dbbadf586c7"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_6.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 25165824,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.24.mlp.gate_up_proj.q_weight",
|
|
"shape": [
|
|
16384,
|
|
384
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 25165824,
|
|
"byteOffset": 0
|
|
}
|
|
],
|
|
"md5sum": "80c2231396e9396ceb97b697136be76d"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_7.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 31469568,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.23.self_attn.qkv_proj.q_weight",
|
|
"shape": [
|
|
5120,
|
|
384
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 7864320,
|
|
"byteOffset": 0
|
|
},
|
|
{
|
|
"name": "model.layers.23.self_attn.qkv_proj.q_scale",
|
|
"shape": [
|
|
5120,
|
|
96
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 983040,
|
|
"byteOffset": 7864320
|
|
},
|
|
{
|
|
"name": "model.layers.23.self_attn.o_proj.q_weight",
|
|
"shape": [
|
|
3072,
|
|
384
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 4718592,
|
|
"byteOffset": 8847360
|
|
},
|
|
{
|
|
"name": "model.layers.23.self_attn.o_proj.q_scale",
|
|
"shape": [
|
|
3072,
|
|
96
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 589824,
|
|
"byteOffset": 13565952
|
|
},
|
|
{
|
|
"name": "model.layers.24.input_layernorm.weight",
|
|
"shape": [
|
|
3072
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 6144,
|
|
"byteOffset": 14155776
|
|
},
|
|
{
|
|
"name": "model.layers.24.mlp.down_proj.q_weight",
|
|
"shape": [
|
|
3072,
|
|
1024
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 12582912,
|
|
"byteOffset": 14161920
|
|
},
|
|
{
|
|
"name": "model.layers.24.mlp.down_proj.q_scale",
|
|
"shape": [
|
|
3072,
|
|
256
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 1572864,
|
|
"byteOffset": 26744832
|
|
},
|
|
{
|
|
"name": "model.layers.24.mlp.gate_up_proj.q_scale",
|
|
"shape": [
|
|
16384,
|
|
96
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 3145728,
|
|
"byteOffset": 28317696
|
|
},
|
|
{
|
|
"name": "model.layers.24.post_attention_layernorm.weight",
|
|
"shape": [
|
|
3072
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 6144,
|
|
"byteOffset": 31463424
|
|
}
|
|
],
|
|
"md5sum": "0735a35d5fc915234797f1fca93c6c52"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_8.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 25165824,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.25.mlp.gate_up_proj.q_weight",
|
|
"shape": [
|
|
16384,
|
|
384
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 25165824,
|
|
"byteOffset": 0
|
|
}
|
|
],
|
|
"md5sum": "7706da15f178fd2acc1ae783ae43c692"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_9.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 31469568,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.24.self_attn.qkv_proj.q_weight",
|
|
"shape": [
|
|
5120,
|
|
384
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 7864320,
|
|
"byteOffset": 0
|
|
},
|
|
{
|
|
"name": "model.layers.24.self_attn.qkv_proj.q_scale",
|
|
"shape": [
|
|
5120,
|
|
96
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 983040,
|
|
"byteOffset": 7864320
|
|
},
|
|
{
|
|
"name": "model.layers.24.self_attn.o_proj.q_weight",
|
|
"shape": [
|
|
3072,
|
|
384
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 4718592,
|
|
"byteOffset": 8847360
|
|
},
|
|
{
|
|
"name": "model.layers.24.self_attn.o_proj.q_scale",
|
|
"shape": [
|
|
3072,
|
|
96
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 589824,
|
|
"byteOffset": 13565952
|
|
},
|
|
{
|
|
"name": "model.layers.25.input_layernorm.weight",
|
|
"shape": [
|
|
3072
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 6144,
|
|
"byteOffset": 14155776
|
|
},
|
|
{
|
|
"name": "model.layers.25.mlp.down_proj.q_weight",
|
|
"shape": [
|
|
3072,
|
|
1024
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 12582912,
|
|
"byteOffset": 14161920
|
|
},
|
|
{
|
|
"name": "model.layers.25.mlp.down_proj.q_scale",
|
|
"shape": [
|
|
3072,
|
|
256
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 1572864,
|
|
"byteOffset": 26744832
|
|
},
|
|
{
|
|
"name": "model.layers.25.mlp.gate_up_proj.q_scale",
|
|
"shape": [
|
|
16384,
|
|
96
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 3145728,
|
|
"byteOffset": 28317696
|
|
},
|
|
{
|
|
"name": "model.layers.25.post_attention_layernorm.weight",
|
|
"shape": [
|
|
3072
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 6144,
|
|
"byteOffset": 31463424
|
|
}
|
|
],
|
|
"md5sum": "9635ec9c01cff00cd3b8f6fa2e9d9452"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_10.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 25165824,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.26.mlp.gate_up_proj.q_weight",
|
|
"shape": [
|
|
16384,
|
|
384
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 25165824,
|
|
"byteOffset": 0
|
|
}
|
|
],
|
|
"md5sum": "cfbc323a4f617827a377b62a014cbf5b"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_11.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 31469568,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.25.self_attn.qkv_proj.q_weight",
|
|
"shape": [
|
|
5120,
|
|
384
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 7864320,
|
|
"byteOffset": 0
|
|
},
|
|
{
|
|
"name": "model.layers.25.self_attn.qkv_proj.q_scale",
|
|
"shape": [
|
|
5120,
|
|
96
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 983040,
|
|
"byteOffset": 7864320
|
|
},
|
|
{
|
|
"name": "model.layers.25.self_attn.o_proj.q_weight",
|
|
"shape": [
|
|
3072,
|
|
384
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 4718592,
|
|
"byteOffset": 8847360
|
|
},
|
|
{
|
|
"name": "model.layers.25.self_attn.o_proj.q_scale",
|
|
"shape": [
|
|
3072,
|
|
96
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 589824,
|
|
"byteOffset": 13565952
|
|
},
|
|
{
|
|
"name": "model.layers.26.input_layernorm.weight",
|
|
"shape": [
|
|
3072
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 6144,
|
|
"byteOffset": 14155776
|
|
},
|
|
{
|
|
"name": "model.layers.26.mlp.down_proj.q_weight",
|
|
"shape": [
|
|
3072,
|
|
1024
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 12582912,
|
|
"byteOffset": 14161920
|
|
},
|
|
{
|
|
"name": "model.layers.26.mlp.down_proj.q_scale",
|
|
"shape": [
|
|
3072,
|
|
256
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 1572864,
|
|
"byteOffset": 26744832
|
|
},
|
|
{
|
|
"name": "model.layers.26.mlp.gate_up_proj.q_scale",
|
|
"shape": [
|
|
16384,
|
|
96
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 3145728,
|
|
"byteOffset": 28317696
|
|
},
|
|
{
|
|
"name": "model.layers.26.post_attention_layernorm.weight",
|
|
"shape": [
|
|
3072
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 6144,
|
|
"byteOffset": 31463424
|
|
}
|
|
],
|
|
"md5sum": "0d9bd7e9d99660be0ad7e36cb14d2485"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_12.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 25165824,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.27.mlp.gate_up_proj.q_weight",
|
|
"shape": [
|
|
16384,
|
|
384
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 25165824,
|
|
"byteOffset": 0
|
|
}
|
|
],
|
|
"md5sum": "9113028fd8934e4be9dca84c2b05f144"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_13.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 31469568,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.26.self_attn.qkv_proj.q_weight",
|
|
"shape": [
|
|
5120,
|
|
384
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 7864320,
|
|
"byteOffset": 0
|
|
},
|
|
{
|
|
"name": "model.layers.26.self_attn.qkv_proj.q_scale",
|
|
"shape": [
|
|
5120,
|
|
96
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 983040,
|
|
"byteOffset": 7864320
|
|
},
|
|
{
|
|
"name": "model.layers.26.self_attn.o_proj.q_weight",
|
|
"shape": [
|
|
3072,
|
|
384
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 4718592,
|
|
"byteOffset": 8847360
|
|
},
|
|
{
|
|
"name": "model.layers.26.self_attn.o_proj.q_scale",
|
|
"shape": [
|
|
3072,
|
|
96
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 589824,
|
|
"byteOffset": 13565952
|
|
},
|
|
{
|
|
"name": "model.layers.27.input_layernorm.weight",
|
|
"shape": [
|
|
3072
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 6144,
|
|
"byteOffset": 14155776
|
|
},
|
|
{
|
|
"name": "model.layers.27.mlp.down_proj.q_weight",
|
|
"shape": [
|
|
3072,
|
|
1024
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 12582912,
|
|
"byteOffset": 14161920
|
|
},
|
|
{
|
|
"name": "model.layers.27.mlp.down_proj.q_scale",
|
|
"shape": [
|
|
3072,
|
|
256
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 1572864,
|
|
"byteOffset": 26744832
|
|
},
|
|
{
|
|
"name": "model.layers.27.mlp.gate_up_proj.q_scale",
|
|
"shape": [
|
|
16384,
|
|
96
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 3145728,
|
|
"byteOffset": 28317696
|
|
},
|
|
{
|
|
"name": "model.layers.27.post_attention_layernorm.weight",
|
|
"shape": [
|
|
3072
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 6144,
|
|
"byteOffset": 31463424
|
|
}
|
|
],
|
|
"md5sum": "935c02d95ce59974e8af64684736b7ec"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_14.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 197001216,
|
|
"records": [
|
|
{
|
|
"name": "model.embed_tokens.q_weight",
|
|
"shape": [
|
|
128256,
|
|
384
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 197001216,
|
|
"byteOffset": 0
|
|
}
|
|
],
|
|
"md5sum": "8fce544e6cf0c33976a47fe4fa732309"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_15.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 24625152,
|
|
"records": [
|
|
{
|
|
"name": "model.embed_tokens.q_scale",
|
|
"shape": [
|
|
128256,
|
|
96
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 24625152,
|
|
"byteOffset": 0
|
|
}
|
|
],
|
|
"md5sum": "f65d51b289b1d7fe0a4e1ba5754e20a5"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_16.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 25165824,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.0.mlp.gate_up_proj.q_weight",
|
|
"shape": [
|
|
16384,
|
|
384
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 25165824,
|
|
"byteOffset": 0
|
|
}
|
|
],
|
|
"md5sum": "eba2dc7af3624aed6f17b39d18294822"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_17.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 31475712,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.27.self_attn.qkv_proj.q_weight",
|
|
"shape": [
|
|
5120,
|
|
384
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 7864320,
|
|
"byteOffset": 0
|
|
},
|
|
{
|
|
"name": "model.layers.27.self_attn.qkv_proj.q_scale",
|
|
"shape": [
|
|
5120,
|
|
96
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 983040,
|
|
"byteOffset": 7864320
|
|
},
|
|
{
|
|
"name": "model.layers.27.self_attn.o_proj.q_weight",
|
|
"shape": [
|
|
3072,
|
|
384
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 4718592,
|
|
"byteOffset": 8847360
|
|
},
|
|
{
|
|
"name": "model.layers.27.self_attn.o_proj.q_scale",
|
|
"shape": [
|
|
3072,
|
|
96
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 589824,
|
|
"byteOffset": 13565952
|
|
},
|
|
{
|
|
"name": "model.norm.weight",
|
|
"shape": [
|
|
3072
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 6144,
|
|
"byteOffset": 14155776
|
|
},
|
|
{
|
|
"name": "model.layers.0.input_layernorm.weight",
|
|
"shape": [
|
|
3072
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 6144,
|
|
"byteOffset": 14161920
|
|
},
|
|
{
|
|
"name": "model.layers.0.mlp.down_proj.q_weight",
|
|
"shape": [
|
|
3072,
|
|
1024
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 12582912,
|
|
"byteOffset": 14168064
|
|
},
|
|
{
|
|
"name": "model.layers.0.mlp.down_proj.q_scale",
|
|
"shape": [
|
|
3072,
|
|
256
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 1572864,
|
|
"byteOffset": 26750976
|
|
},
|
|
{
|
|
"name": "model.layers.0.mlp.gate_up_proj.q_scale",
|
|
"shape": [
|
|
16384,
|
|
96
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 3145728,
|
|
"byteOffset": 28323840
|
|
},
|
|
{
|
|
"name": "model.layers.0.post_attention_layernorm.weight",
|
|
"shape": [
|
|
3072
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 6144,
|
|
"byteOffset": 31469568
|
|
}
|
|
],
|
|
"md5sum": "48decf80211842a439487b2364bfa6c3"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_18.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 25165824,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.1.mlp.gate_up_proj.q_weight",
|
|
"shape": [
|
|
16384,
|
|
384
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 25165824,
|
|
"byteOffset": 0
|
|
}
|
|
],
|
|
"md5sum": "59e1945a33969a79b72e819db55fdb1d"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_19.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 31469568,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.0.self_attn.qkv_proj.q_weight",
|
|
"shape": [
|
|
5120,
|
|
384
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 7864320,
|
|
"byteOffset": 0
|
|
},
|
|
{
|
|
"name": "model.layers.0.self_attn.qkv_proj.q_scale",
|
|
"shape": [
|
|
5120,
|
|
96
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 983040,
|
|
"byteOffset": 7864320
|
|
},
|
|
{
|
|
"name": "model.layers.0.self_attn.o_proj.q_weight",
|
|
"shape": [
|
|
3072,
|
|
384
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 4718592,
|
|
"byteOffset": 8847360
|
|
},
|
|
{
|
|
"name": "model.layers.0.self_attn.o_proj.q_scale",
|
|
"shape": [
|
|
3072,
|
|
96
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 589824,
|
|
"byteOffset": 13565952
|
|
},
|
|
{
|
|
"name": "model.layers.1.input_layernorm.weight",
|
|
"shape": [
|
|
3072
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 6144,
|
|
"byteOffset": 14155776
|
|
},
|
|
{
|
|
"name": "model.layers.1.mlp.down_proj.q_weight",
|
|
"shape": [
|
|
3072,
|
|
1024
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 12582912,
|
|
"byteOffset": 14161920
|
|
},
|
|
{
|
|
"name": "model.layers.1.mlp.down_proj.q_scale",
|
|
"shape": [
|
|
3072,
|
|
256
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 1572864,
|
|
"byteOffset": 26744832
|
|
},
|
|
{
|
|
"name": "model.layers.1.mlp.gate_up_proj.q_scale",
|
|
"shape": [
|
|
16384,
|
|
96
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 3145728,
|
|
"byteOffset": 28317696
|
|
},
|
|
{
|
|
"name": "model.layers.1.post_attention_layernorm.weight",
|
|
"shape": [
|
|
3072
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 6144,
|
|
"byteOffset": 31463424
|
|
}
|
|
],
|
|
"md5sum": "46cf1310044d766793478ed957087c49"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_20.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 25165824,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.10.mlp.gate_up_proj.q_weight",
|
|
"shape": [
|
|
16384,
|
|
384
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 25165824,
|
|
"byteOffset": 0
|
|
}
|
|
],
|
|
"md5sum": "9ac2f0522d4072701540d391c4fed685"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_21.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 31469568,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.1.self_attn.qkv_proj.q_weight",
|
|
"shape": [
|
|
5120,
|
|
384
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 7864320,
|
|
"byteOffset": 0
|
|
},
|
|
{
|
|
"name": "model.layers.1.self_attn.qkv_proj.q_scale",
|
|
"shape": [
|
|
5120,
|
|
96
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 983040,
|
|
"byteOffset": 7864320
|
|
},
|
|
{
|
|
"name": "model.layers.1.self_attn.o_proj.q_weight",
|
|
"shape": [
|
|
3072,
|
|
384
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 4718592,
|
|
"byteOffset": 8847360
|
|
},
|
|
{
|
|
"name": "model.layers.1.self_attn.o_proj.q_scale",
|
|
"shape": [
|
|
3072,
|
|
96
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 589824,
|
|
"byteOffset": 13565952
|
|
},
|
|
{
|
|
"name": "model.layers.10.input_layernorm.weight",
|
|
"shape": [
|
|
3072
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 6144,
|
|
"byteOffset": 14155776
|
|
},
|
|
{
|
|
"name": "model.layers.10.mlp.down_proj.q_weight",
|
|
"shape": [
|
|
3072,
|
|
1024
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 12582912,
|
|
"byteOffset": 14161920
|
|
},
|
|
{
|
|
"name": "model.layers.10.mlp.down_proj.q_scale",
|
|
"shape": [
|
|
3072,
|
|
256
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 1572864,
|
|
"byteOffset": 26744832
|
|
},
|
|
{
|
|
"name": "model.layers.10.mlp.gate_up_proj.q_scale",
|
|
"shape": [
|
|
16384,
|
|
96
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 3145728,
|
|
"byteOffset": 28317696
|
|
},
|
|
{
|
|
"name": "model.layers.10.post_attention_layernorm.weight",
|
|
"shape": [
|
|
3072
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 6144,
|
|
"byteOffset": 31463424
|
|
}
|
|
],
|
|
"md5sum": "b3065e4f8f00afea2211f62a3094621d"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_22.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 25165824,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.11.mlp.gate_up_proj.q_weight",
|
|
"shape": [
|
|
16384,
|
|
384
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 25165824,
|
|
"byteOffset": 0
|
|
}
|
|
],
|
|
"md5sum": "b45bb556b42263d11b33eb828cd4d4f6"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_23.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 31469568,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.10.self_attn.qkv_proj.q_weight",
|
|
"shape": [
|
|
5120,
|
|
384
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 7864320,
|
|
"byteOffset": 0
|
|
},
|
|
{
|
|
"name": "model.layers.10.self_attn.qkv_proj.q_scale",
|
|
"shape": [
|
|
5120,
|
|
96
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 983040,
|
|
"byteOffset": 7864320
|
|
},
|
|
{
|
|
"name": "model.layers.10.self_attn.o_proj.q_weight",
|
|
"shape": [
|
|
3072,
|
|
384
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 4718592,
|
|
"byteOffset": 8847360
|
|
},
|
|
{
|
|
"name": "model.layers.10.self_attn.o_proj.q_scale",
|
|
"shape": [
|
|
3072,
|
|
96
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 589824,
|
|
"byteOffset": 13565952
|
|
},
|
|
{
|
|
"name": "model.layers.11.input_layernorm.weight",
|
|
"shape": [
|
|
3072
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 6144,
|
|
"byteOffset": 14155776
|
|
},
|
|
{
|
|
"name": "model.layers.11.mlp.down_proj.q_weight",
|
|
"shape": [
|
|
3072,
|
|
1024
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 12582912,
|
|
"byteOffset": 14161920
|
|
},
|
|
{
|
|
"name": "model.layers.11.mlp.down_proj.q_scale",
|
|
"shape": [
|
|
3072,
|
|
256
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 1572864,
|
|
"byteOffset": 26744832
|
|
},
|
|
{
|
|
"name": "model.layers.11.mlp.gate_up_proj.q_scale",
|
|
"shape": [
|
|
16384,
|
|
96
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 3145728,
|
|
"byteOffset": 28317696
|
|
},
|
|
{
|
|
"name": "model.layers.11.post_attention_layernorm.weight",
|
|
"shape": [
|
|
3072
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 6144,
|
|
"byteOffset": 31463424
|
|
}
|
|
],
|
|
"md5sum": "6d5f81002086674b24e7f374362ff0fb"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_24.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 25165824,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.12.mlp.gate_up_proj.q_weight",
|
|
"shape": [
|
|
16384,
|
|
384
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 25165824,
|
|
"byteOffset": 0
|
|
}
|
|
],
|
|
"md5sum": "aedc8bd5bc464c7d33f2b8114adbd4a6"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_25.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 31469568,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.11.self_attn.qkv_proj.q_weight",
|
|
"shape": [
|
|
5120,
|
|
384
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 7864320,
|
|
"byteOffset": 0
|
|
},
|
|
{
|
|
"name": "model.layers.11.self_attn.qkv_proj.q_scale",
|
|
"shape": [
|
|
5120,
|
|
96
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 983040,
|
|
"byteOffset": 7864320
|
|
},
|
|
{
|
|
"name": "model.layers.11.self_attn.o_proj.q_weight",
|
|
"shape": [
|
|
3072,
|
|
384
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 4718592,
|
|
"byteOffset": 8847360
|
|
},
|
|
{
|
|
"name": "model.layers.11.self_attn.o_proj.q_scale",
|
|
"shape": [
|
|
3072,
|
|
96
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 589824,
|
|
"byteOffset": 13565952
|
|
},
|
|
{
|
|
"name": "model.layers.12.input_layernorm.weight",
|
|
"shape": [
|
|
3072
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 6144,
|
|
"byteOffset": 14155776
|
|
},
|
|
{
|
|
"name": "model.layers.12.mlp.down_proj.q_weight",
|
|
"shape": [
|
|
3072,
|
|
1024
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 12582912,
|
|
"byteOffset": 14161920
|
|
},
|
|
{
|
|
"name": "model.layers.12.mlp.down_proj.q_scale",
|
|
"shape": [
|
|
3072,
|
|
256
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 1572864,
|
|
"byteOffset": 26744832
|
|
},
|
|
{
|
|
"name": "model.layers.12.mlp.gate_up_proj.q_scale",
|
|
"shape": [
|
|
16384,
|
|
96
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 3145728,
|
|
"byteOffset": 28317696
|
|
},
|
|
{
|
|
"name": "model.layers.12.post_attention_layernorm.weight",
|
|
"shape": [
|
|
3072
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 6144,
|
|
"byteOffset": 31463424
|
|
}
|
|
],
|
|
"md5sum": "d094c2cf33122e807acb15b016140a30"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_26.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 25165824,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.13.mlp.gate_up_proj.q_weight",
|
|
"shape": [
|
|
16384,
|
|
384
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 25165824,
|
|
"byteOffset": 0
|
|
}
|
|
],
|
|
"md5sum": "273e4577ca8c4b00bcb31c58b516f88c"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_27.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 31469568,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.12.self_attn.qkv_proj.q_weight",
|
|
"shape": [
|
|
5120,
|
|
384
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 7864320,
|
|
"byteOffset": 0
|
|
},
|
|
{
|
|
"name": "model.layers.12.self_attn.qkv_proj.q_scale",
|
|
"shape": [
|
|
5120,
|
|
96
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 983040,
|
|
"byteOffset": 7864320
|
|
},
|
|
{
|
|
"name": "model.layers.12.self_attn.o_proj.q_weight",
|
|
"shape": [
|
|
3072,
|
|
384
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 4718592,
|
|
"byteOffset": 8847360
|
|
},
|
|
{
|
|
"name": "model.layers.12.self_attn.o_proj.q_scale",
|
|
"shape": [
|
|
3072,
|
|
96
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 589824,
|
|
"byteOffset": 13565952
|
|
},
|
|
{
|
|
"name": "model.layers.13.input_layernorm.weight",
|
|
"shape": [
|
|
3072
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 6144,
|
|
"byteOffset": 14155776
|
|
},
|
|
{
|
|
"name": "model.layers.13.mlp.down_proj.q_weight",
|
|
"shape": [
|
|
3072,
|
|
1024
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 12582912,
|
|
"byteOffset": 14161920
|
|
},
|
|
{
|
|
"name": "model.layers.13.mlp.down_proj.q_scale",
|
|
"shape": [
|
|
3072,
|
|
256
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 1572864,
|
|
"byteOffset": 26744832
|
|
},
|
|
{
|
|
"name": "model.layers.13.mlp.gate_up_proj.q_scale",
|
|
"shape": [
|
|
16384,
|
|
96
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 3145728,
|
|
"byteOffset": 28317696
|
|
},
|
|
{
|
|
"name": "model.layers.13.post_attention_layernorm.weight",
|
|
"shape": [
|
|
3072
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 6144,
|
|
"byteOffset": 31463424
|
|
}
|
|
],
|
|
"md5sum": "5f10156038894af339f81fe798393d15"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_28.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 25165824,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.14.mlp.gate_up_proj.q_weight",
|
|
"shape": [
|
|
16384,
|
|
384
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 25165824,
|
|
"byteOffset": 0
|
|
}
|
|
],
|
|
"md5sum": "bde3abec9e9065cf8a0cc36a0e620968"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_29.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 31469568,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.13.self_attn.qkv_proj.q_weight",
|
|
"shape": [
|
|
5120,
|
|
384
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 7864320,
|
|
"byteOffset": 0
|
|
},
|
|
{
|
|
"name": "model.layers.13.self_attn.qkv_proj.q_scale",
|
|
"shape": [
|
|
5120,
|
|
96
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 983040,
|
|
"byteOffset": 7864320
|
|
},
|
|
{
|
|
"name": "model.layers.13.self_attn.o_proj.q_weight",
|
|
"shape": [
|
|
3072,
|
|
384
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 4718592,
|
|
"byteOffset": 8847360
|
|
},
|
|
{
|
|
"name": "model.layers.13.self_attn.o_proj.q_scale",
|
|
"shape": [
|
|
3072,
|
|
96
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 589824,
|
|
"byteOffset": 13565952
|
|
},
|
|
{
|
|
"name": "model.layers.14.input_layernorm.weight",
|
|
"shape": [
|
|
3072
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 6144,
|
|
"byteOffset": 14155776
|
|
},
|
|
{
|
|
"name": "model.layers.14.mlp.down_proj.q_weight",
|
|
"shape": [
|
|
3072,
|
|
1024
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 12582912,
|
|
"byteOffset": 14161920
|
|
},
|
|
{
|
|
"name": "model.layers.14.mlp.down_proj.q_scale",
|
|
"shape": [
|
|
3072,
|
|
256
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 1572864,
|
|
"byteOffset": 26744832
|
|
},
|
|
{
|
|
"name": "model.layers.14.mlp.gate_up_proj.q_scale",
|
|
"shape": [
|
|
16384,
|
|
96
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 3145728,
|
|
"byteOffset": 28317696
|
|
},
|
|
{
|
|
"name": "model.layers.14.post_attention_layernorm.weight",
|
|
"shape": [
|
|
3072
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 6144,
|
|
"byteOffset": 31463424
|
|
}
|
|
],
|
|
"md5sum": "7f657637c85a279a6e980f8e02ba577d"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_30.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 25165824,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.15.mlp.gate_up_proj.q_weight",
|
|
"shape": [
|
|
16384,
|
|
384
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 25165824,
|
|
"byteOffset": 0
|
|
}
|
|
],
|
|
"md5sum": "5817e46f51dc14b5aa318c47e4c0a797"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_31.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 31469568,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.14.self_attn.qkv_proj.q_weight",
|
|
"shape": [
|
|
5120,
|
|
384
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 7864320,
|
|
"byteOffset": 0
|
|
},
|
|
{
|
|
"name": "model.layers.14.self_attn.qkv_proj.q_scale",
|
|
"shape": [
|
|
5120,
|
|
96
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 983040,
|
|
"byteOffset": 7864320
|
|
},
|
|
{
|
|
"name": "model.layers.14.self_attn.o_proj.q_weight",
|
|
"shape": [
|
|
3072,
|
|
384
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 4718592,
|
|
"byteOffset": 8847360
|
|
},
|
|
{
|
|
"name": "model.layers.14.self_attn.o_proj.q_scale",
|
|
"shape": [
|
|
3072,
|
|
96
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 589824,
|
|
"byteOffset": 13565952
|
|
},
|
|
{
|
|
"name": "model.layers.15.input_layernorm.weight",
|
|
"shape": [
|
|
3072
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 6144,
|
|
"byteOffset": 14155776
|
|
},
|
|
{
|
|
"name": "model.layers.15.mlp.down_proj.q_weight",
|
|
"shape": [
|
|
3072,
|
|
1024
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 12582912,
|
|
"byteOffset": 14161920
|
|
},
|
|
{
|
|
"name": "model.layers.15.mlp.down_proj.q_scale",
|
|
"shape": [
|
|
3072,
|
|
256
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 1572864,
|
|
"byteOffset": 26744832
|
|
},
|
|
{
|
|
"name": "model.layers.15.mlp.gate_up_proj.q_scale",
|
|
"shape": [
|
|
16384,
|
|
96
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 3145728,
|
|
"byteOffset": 28317696
|
|
},
|
|
{
|
|
"name": "model.layers.15.post_attention_layernorm.weight",
|
|
"shape": [
|
|
3072
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 6144,
|
|
"byteOffset": 31463424
|
|
}
|
|
],
|
|
"md5sum": "e49e23eceb5a5fa68118f5764887630f"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_32.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 25165824,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.16.mlp.gate_up_proj.q_weight",
|
|
"shape": [
|
|
16384,
|
|
384
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 25165824,
|
|
"byteOffset": 0
|
|
}
|
|
],
|
|
"md5sum": "7e65a6ecebf0fd00e964b3be2131d484"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_33.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 31469568,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.15.self_attn.qkv_proj.q_weight",
|
|
"shape": [
|
|
5120,
|
|
384
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 7864320,
|
|
"byteOffset": 0
|
|
},
|
|
{
|
|
"name": "model.layers.15.self_attn.qkv_proj.q_scale",
|
|
"shape": [
|
|
5120,
|
|
96
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 983040,
|
|
"byteOffset": 7864320
|
|
},
|
|
{
|
|
"name": "model.layers.15.self_attn.o_proj.q_weight",
|
|
"shape": [
|
|
3072,
|
|
384
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 4718592,
|
|
"byteOffset": 8847360
|
|
},
|
|
{
|
|
"name": "model.layers.15.self_attn.o_proj.q_scale",
|
|
"shape": [
|
|
3072,
|
|
96
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 589824,
|
|
"byteOffset": 13565952
|
|
},
|
|
{
|
|
"name": "model.layers.16.input_layernorm.weight",
|
|
"shape": [
|
|
3072
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 6144,
|
|
"byteOffset": 14155776
|
|
},
|
|
{
|
|
"name": "model.layers.16.mlp.down_proj.q_weight",
|
|
"shape": [
|
|
3072,
|
|
1024
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 12582912,
|
|
"byteOffset": 14161920
|
|
},
|
|
{
|
|
"name": "model.layers.16.mlp.down_proj.q_scale",
|
|
"shape": [
|
|
3072,
|
|
256
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 1572864,
|
|
"byteOffset": 26744832
|
|
},
|
|
{
|
|
"name": "model.layers.16.mlp.gate_up_proj.q_scale",
|
|
"shape": [
|
|
16384,
|
|
96
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 3145728,
|
|
"byteOffset": 28317696
|
|
},
|
|
{
|
|
"name": "model.layers.16.post_attention_layernorm.weight",
|
|
"shape": [
|
|
3072
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 6144,
|
|
"byteOffset": 31463424
|
|
}
|
|
],
|
|
"md5sum": "fd29b628416b8e2dda5b5aebaad3d778"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_34.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 25165824,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.17.mlp.gate_up_proj.q_weight",
|
|
"shape": [
|
|
16384,
|
|
384
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 25165824,
|
|
"byteOffset": 0
|
|
}
|
|
],
|
|
"md5sum": "d37a74fa054faf909df5bc25050cb2db"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_35.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 31469568,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.16.self_attn.qkv_proj.q_weight",
|
|
"shape": [
|
|
5120,
|
|
384
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 7864320,
|
|
"byteOffset": 0
|
|
},
|
|
{
|
|
"name": "model.layers.16.self_attn.qkv_proj.q_scale",
|
|
"shape": [
|
|
5120,
|
|
96
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 983040,
|
|
"byteOffset": 7864320
|
|
},
|
|
{
|
|
"name": "model.layers.16.self_attn.o_proj.q_weight",
|
|
"shape": [
|
|
3072,
|
|
384
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 4718592,
|
|
"byteOffset": 8847360
|
|
},
|
|
{
|
|
"name": "model.layers.16.self_attn.o_proj.q_scale",
|
|
"shape": [
|
|
3072,
|
|
96
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 589824,
|
|
"byteOffset": 13565952
|
|
},
|
|
{
|
|
"name": "model.layers.17.input_layernorm.weight",
|
|
"shape": [
|
|
3072
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 6144,
|
|
"byteOffset": 14155776
|
|
},
|
|
{
|
|
"name": "model.layers.17.mlp.down_proj.q_weight",
|
|
"shape": [
|
|
3072,
|
|
1024
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 12582912,
|
|
"byteOffset": 14161920
|
|
},
|
|
{
|
|
"name": "model.layers.17.mlp.down_proj.q_scale",
|
|
"shape": [
|
|
3072,
|
|
256
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 1572864,
|
|
"byteOffset": 26744832
|
|
},
|
|
{
|
|
"name": "model.layers.17.mlp.gate_up_proj.q_scale",
|
|
"shape": [
|
|
16384,
|
|
96
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 3145728,
|
|
"byteOffset": 28317696
|
|
},
|
|
{
|
|
"name": "model.layers.17.post_attention_layernorm.weight",
|
|
"shape": [
|
|
3072
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 6144,
|
|
"byteOffset": 31463424
|
|
}
|
|
],
|
|
"md5sum": "7eff2433e734422f9c66b151c96325cb"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_36.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 25165824,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.18.mlp.gate_up_proj.q_weight",
|
|
"shape": [
|
|
16384,
|
|
384
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 25165824,
|
|
"byteOffset": 0
|
|
}
|
|
],
|
|
"md5sum": "0db8bad1921295ba89b4ee6c978cf4fb"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_37.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 31469568,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.17.self_attn.qkv_proj.q_weight",
|
|
"shape": [
|
|
5120,
|
|
384
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 7864320,
|
|
"byteOffset": 0
|
|
},
|
|
{
|
|
"name": "model.layers.17.self_attn.qkv_proj.q_scale",
|
|
"shape": [
|
|
5120,
|
|
96
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 983040,
|
|
"byteOffset": 7864320
|
|
},
|
|
{
|
|
"name": "model.layers.17.self_attn.o_proj.q_weight",
|
|
"shape": [
|
|
3072,
|
|
384
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 4718592,
|
|
"byteOffset": 8847360
|
|
},
|
|
{
|
|
"name": "model.layers.17.self_attn.o_proj.q_scale",
|
|
"shape": [
|
|
3072,
|
|
96
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 589824,
|
|
"byteOffset": 13565952
|
|
},
|
|
{
|
|
"name": "model.layers.18.input_layernorm.weight",
|
|
"shape": [
|
|
3072
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 6144,
|
|
"byteOffset": 14155776
|
|
},
|
|
{
|
|
"name": "model.layers.18.mlp.down_proj.q_weight",
|
|
"shape": [
|
|
3072,
|
|
1024
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 12582912,
|
|
"byteOffset": 14161920
|
|
},
|
|
{
|
|
"name": "model.layers.18.mlp.down_proj.q_scale",
|
|
"shape": [
|
|
3072,
|
|
256
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 1572864,
|
|
"byteOffset": 26744832
|
|
},
|
|
{
|
|
"name": "model.layers.18.mlp.gate_up_proj.q_scale",
|
|
"shape": [
|
|
16384,
|
|
96
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 3145728,
|
|
"byteOffset": 28317696
|
|
},
|
|
{
|
|
"name": "model.layers.18.post_attention_layernorm.weight",
|
|
"shape": [
|
|
3072
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 6144,
|
|
"byteOffset": 31463424
|
|
}
|
|
],
|
|
"md5sum": "c15c593dfac5d587e641fa4d70e9f017"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_38.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 25165824,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.19.mlp.gate_up_proj.q_weight",
|
|
"shape": [
|
|
16384,
|
|
384
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 25165824,
|
|
"byteOffset": 0
|
|
}
|
|
],
|
|
"md5sum": "b1075351b635b78ec694756703983a57"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_39.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 31469568,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.18.self_attn.qkv_proj.q_weight",
|
|
"shape": [
|
|
5120,
|
|
384
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 7864320,
|
|
"byteOffset": 0
|
|
},
|
|
{
|
|
"name": "model.layers.18.self_attn.qkv_proj.q_scale",
|
|
"shape": [
|
|
5120,
|
|
96
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 983040,
|
|
"byteOffset": 7864320
|
|
},
|
|
{
|
|
"name": "model.layers.18.self_attn.o_proj.q_weight",
|
|
"shape": [
|
|
3072,
|
|
384
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 4718592,
|
|
"byteOffset": 8847360
|
|
},
|
|
{
|
|
"name": "model.layers.18.self_attn.o_proj.q_scale",
|
|
"shape": [
|
|
3072,
|
|
96
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 589824,
|
|
"byteOffset": 13565952
|
|
},
|
|
{
|
|
"name": "model.layers.19.input_layernorm.weight",
|
|
"shape": [
|
|
3072
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 6144,
|
|
"byteOffset": 14155776
|
|
},
|
|
{
|
|
"name": "model.layers.19.mlp.down_proj.q_weight",
|
|
"shape": [
|
|
3072,
|
|
1024
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 12582912,
|
|
"byteOffset": 14161920
|
|
},
|
|
{
|
|
"name": "model.layers.19.mlp.down_proj.q_scale",
|
|
"shape": [
|
|
3072,
|
|
256
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 1572864,
|
|
"byteOffset": 26744832
|
|
},
|
|
{
|
|
"name": "model.layers.19.mlp.gate_up_proj.q_scale",
|
|
"shape": [
|
|
16384,
|
|
96
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 3145728,
|
|
"byteOffset": 28317696
|
|
},
|
|
{
|
|
"name": "model.layers.19.post_attention_layernorm.weight",
|
|
"shape": [
|
|
3072
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 6144,
|
|
"byteOffset": 31463424
|
|
}
|
|
],
|
|
"md5sum": "98f1e8e1a5936e8ffeb07a092c86292f"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_40.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 25165824,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.2.mlp.gate_up_proj.q_weight",
|
|
"shape": [
|
|
16384,
|
|
384
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 25165824,
|
|
"byteOffset": 0
|
|
}
|
|
],
|
|
"md5sum": "42bdbeb135823332caedbdcbf4234e96"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_41.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 31469568,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.19.self_attn.qkv_proj.q_weight",
|
|
"shape": [
|
|
5120,
|
|
384
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 7864320,
|
|
"byteOffset": 0
|
|
},
|
|
{
|
|
"name": "model.layers.19.self_attn.qkv_proj.q_scale",
|
|
"shape": [
|
|
5120,
|
|
96
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 983040,
|
|
"byteOffset": 7864320
|
|
},
|
|
{
|
|
"name": "model.layers.19.self_attn.o_proj.q_weight",
|
|
"shape": [
|
|
3072,
|
|
384
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 4718592,
|
|
"byteOffset": 8847360
|
|
},
|
|
{
|
|
"name": "model.layers.19.self_attn.o_proj.q_scale",
|
|
"shape": [
|
|
3072,
|
|
96
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 589824,
|
|
"byteOffset": 13565952
|
|
},
|
|
{
|
|
"name": "model.layers.2.input_layernorm.weight",
|
|
"shape": [
|
|
3072
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 6144,
|
|
"byteOffset": 14155776
|
|
},
|
|
{
|
|
"name": "model.layers.2.mlp.down_proj.q_weight",
|
|
"shape": [
|
|
3072,
|
|
1024
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 12582912,
|
|
"byteOffset": 14161920
|
|
},
|
|
{
|
|
"name": "model.layers.2.mlp.down_proj.q_scale",
|
|
"shape": [
|
|
3072,
|
|
256
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 1572864,
|
|
"byteOffset": 26744832
|
|
},
|
|
{
|
|
"name": "model.layers.2.mlp.gate_up_proj.q_scale",
|
|
"shape": [
|
|
16384,
|
|
96
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 3145728,
|
|
"byteOffset": 28317696
|
|
},
|
|
{
|
|
"name": "model.layers.2.post_attention_layernorm.weight",
|
|
"shape": [
|
|
3072
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 6144,
|
|
"byteOffset": 31463424
|
|
}
|
|
],
|
|
"md5sum": "bfd292b35243b5bdc59fb3a15019440f"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_42.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 25165824,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.20.mlp.gate_up_proj.q_weight",
|
|
"shape": [
|
|
16384,
|
|
384
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 25165824,
|
|
"byteOffset": 0
|
|
}
|
|
],
|
|
"md5sum": "8ebfc99a2debafc995b8be0fcec70a7f"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_43.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 31463424,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.2.self_attn.qkv_proj.q_weight",
|
|
"shape": [
|
|
5120,
|
|
384
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 7864320,
|
|
"byteOffset": 0
|
|
},
|
|
{
|
|
"name": "model.layers.2.self_attn.qkv_proj.q_scale",
|
|
"shape": [
|
|
5120,
|
|
96
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 983040,
|
|
"byteOffset": 7864320
|
|
},
|
|
{
|
|
"name": "model.layers.2.self_attn.o_proj.q_weight",
|
|
"shape": [
|
|
3072,
|
|
384
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 4718592,
|
|
"byteOffset": 8847360
|
|
},
|
|
{
|
|
"name": "model.layers.2.self_attn.o_proj.q_scale",
|
|
"shape": [
|
|
3072,
|
|
96
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 589824,
|
|
"byteOffset": 13565952
|
|
},
|
|
{
|
|
"name": "model.layers.20.mlp.gate_up_proj.q_scale",
|
|
"shape": [
|
|
16384,
|
|
96
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 3145728,
|
|
"byteOffset": 14155776
|
|
},
|
|
{
|
|
"name": "model.layers.20.self_attn.qkv_proj.q_weight",
|
|
"shape": [
|
|
5120,
|
|
384
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 7864320,
|
|
"byteOffset": 17301504
|
|
},
|
|
{
|
|
"name": "model.layers.20.self_attn.qkv_proj.q_scale",
|
|
"shape": [
|
|
5120,
|
|
96
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 983040,
|
|
"byteOffset": 25165824
|
|
},
|
|
{
|
|
"name": "model.layers.20.self_attn.o_proj.q_weight",
|
|
"shape": [
|
|
3072,
|
|
384
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 4718592,
|
|
"byteOffset": 26148864
|
|
},
|
|
{
|
|
"name": "model.layers.20.self_attn.o_proj.q_scale",
|
|
"shape": [
|
|
3072,
|
|
96
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 589824,
|
|
"byteOffset": 30867456
|
|
},
|
|
{
|
|
"name": "model.layers.3.input_layernorm.weight",
|
|
"shape": [
|
|
3072
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 6144,
|
|
"byteOffset": 31457280
|
|
}
|
|
],
|
|
"md5sum": "2e3c0f2cd7adfade3b840299b7e82a82"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_44.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 25165824,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.3.mlp.gate_up_proj.q_weight",
|
|
"shape": [
|
|
16384,
|
|
384
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 25165824,
|
|
"byteOffset": 0
|
|
}
|
|
],
|
|
"md5sum": "1ae8771b963135e66fbd6cf956dc8b81"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_45.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 31469568,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.3.mlp.down_proj.q_weight",
|
|
"shape": [
|
|
3072,
|
|
1024
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 12582912,
|
|
"byteOffset": 0
|
|
},
|
|
{
|
|
"name": "model.layers.3.mlp.down_proj.q_scale",
|
|
"shape": [
|
|
3072,
|
|
256
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 1572864,
|
|
"byteOffset": 12582912
|
|
},
|
|
{
|
|
"name": "model.layers.3.mlp.gate_up_proj.q_scale",
|
|
"shape": [
|
|
16384,
|
|
96
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 3145728,
|
|
"byteOffset": 14155776
|
|
},
|
|
{
|
|
"name": "model.layers.3.post_attention_layernorm.weight",
|
|
"shape": [
|
|
3072
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 6144,
|
|
"byteOffset": 17301504
|
|
},
|
|
{
|
|
"name": "model.layers.3.self_attn.qkv_proj.q_weight",
|
|
"shape": [
|
|
5120,
|
|
384
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 7864320,
|
|
"byteOffset": 17307648
|
|
},
|
|
{
|
|
"name": "model.layers.3.self_attn.qkv_proj.q_scale",
|
|
"shape": [
|
|
5120,
|
|
96
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 983040,
|
|
"byteOffset": 25171968
|
|
},
|
|
{
|
|
"name": "model.layers.3.self_attn.o_proj.q_weight",
|
|
"shape": [
|
|
3072,
|
|
384
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 4718592,
|
|
"byteOffset": 26155008
|
|
},
|
|
{
|
|
"name": "model.layers.3.self_attn.o_proj.q_scale",
|
|
"shape": [
|
|
3072,
|
|
96
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 589824,
|
|
"byteOffset": 30873600
|
|
},
|
|
{
|
|
"name": "model.layers.4.input_layernorm.weight",
|
|
"shape": [
|
|
3072
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 6144,
|
|
"byteOffset": 31463424
|
|
}
|
|
],
|
|
"md5sum": "9514cf59c22b500e60b983aaded04875"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_46.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 25165824,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.4.mlp.gate_up_proj.q_weight",
|
|
"shape": [
|
|
16384,
|
|
384
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 25165824,
|
|
"byteOffset": 0
|
|
}
|
|
],
|
|
"md5sum": "87af44325fc052fa7c27be599ff04df5"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_47.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 31469568,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.4.mlp.down_proj.q_weight",
|
|
"shape": [
|
|
3072,
|
|
1024
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 12582912,
|
|
"byteOffset": 0
|
|
},
|
|
{
|
|
"name": "model.layers.4.mlp.down_proj.q_scale",
|
|
"shape": [
|
|
3072,
|
|
256
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 1572864,
|
|
"byteOffset": 12582912
|
|
},
|
|
{
|
|
"name": "model.layers.4.mlp.gate_up_proj.q_scale",
|
|
"shape": [
|
|
16384,
|
|
96
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 3145728,
|
|
"byteOffset": 14155776
|
|
},
|
|
{
|
|
"name": "model.layers.4.post_attention_layernorm.weight",
|
|
"shape": [
|
|
3072
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 6144,
|
|
"byteOffset": 17301504
|
|
},
|
|
{
|
|
"name": "model.layers.4.self_attn.qkv_proj.q_weight",
|
|
"shape": [
|
|
5120,
|
|
384
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 7864320,
|
|
"byteOffset": 17307648
|
|
},
|
|
{
|
|
"name": "model.layers.4.self_attn.qkv_proj.q_scale",
|
|
"shape": [
|
|
5120,
|
|
96
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 983040,
|
|
"byteOffset": 25171968
|
|
},
|
|
{
|
|
"name": "model.layers.4.self_attn.o_proj.q_weight",
|
|
"shape": [
|
|
3072,
|
|
384
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 4718592,
|
|
"byteOffset": 26155008
|
|
},
|
|
{
|
|
"name": "model.layers.4.self_attn.o_proj.q_scale",
|
|
"shape": [
|
|
3072,
|
|
96
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 589824,
|
|
"byteOffset": 30873600
|
|
},
|
|
{
|
|
"name": "model.layers.5.input_layernorm.weight",
|
|
"shape": [
|
|
3072
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 6144,
|
|
"byteOffset": 31463424
|
|
}
|
|
],
|
|
"md5sum": "534235fc2141823c8bbcf8411e051576"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_48.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 25165824,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.5.mlp.gate_up_proj.q_weight",
|
|
"shape": [
|
|
16384,
|
|
384
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 25165824,
|
|
"byteOffset": 0
|
|
}
|
|
],
|
|
"md5sum": "c11d531eb7d9597271e8559f6941e2c7"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_49.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 31469568,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.5.mlp.down_proj.q_weight",
|
|
"shape": [
|
|
3072,
|
|
1024
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 12582912,
|
|
"byteOffset": 0
|
|
},
|
|
{
|
|
"name": "model.layers.5.mlp.down_proj.q_scale",
|
|
"shape": [
|
|
3072,
|
|
256
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 1572864,
|
|
"byteOffset": 12582912
|
|
},
|
|
{
|
|
"name": "model.layers.5.mlp.gate_up_proj.q_scale",
|
|
"shape": [
|
|
16384,
|
|
96
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 3145728,
|
|
"byteOffset": 14155776
|
|
},
|
|
{
|
|
"name": "model.layers.5.post_attention_layernorm.weight",
|
|
"shape": [
|
|
3072
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 6144,
|
|
"byteOffset": 17301504
|
|
},
|
|
{
|
|
"name": "model.layers.5.self_attn.qkv_proj.q_weight",
|
|
"shape": [
|
|
5120,
|
|
384
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 7864320,
|
|
"byteOffset": 17307648
|
|
},
|
|
{
|
|
"name": "model.layers.5.self_attn.qkv_proj.q_scale",
|
|
"shape": [
|
|
5120,
|
|
96
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 983040,
|
|
"byteOffset": 25171968
|
|
},
|
|
{
|
|
"name": "model.layers.5.self_attn.o_proj.q_weight",
|
|
"shape": [
|
|
3072,
|
|
384
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 4718592,
|
|
"byteOffset": 26155008
|
|
},
|
|
{
|
|
"name": "model.layers.5.self_attn.o_proj.q_scale",
|
|
"shape": [
|
|
3072,
|
|
96
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 589824,
|
|
"byteOffset": 30873600
|
|
},
|
|
{
|
|
"name": "model.layers.6.input_layernorm.weight",
|
|
"shape": [
|
|
3072
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 6144,
|
|
"byteOffset": 31463424
|
|
}
|
|
],
|
|
"md5sum": "685a22341bf39892eff222d44678919a"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_50.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 25165824,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.6.mlp.gate_up_proj.q_weight",
|
|
"shape": [
|
|
16384,
|
|
384
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 25165824,
|
|
"byteOffset": 0
|
|
}
|
|
],
|
|
"md5sum": "7c58967dc0aeb252b94f2fedac091693"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_51.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 31469568,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.6.mlp.down_proj.q_weight",
|
|
"shape": [
|
|
3072,
|
|
1024
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 12582912,
|
|
"byteOffset": 0
|
|
},
|
|
{
|
|
"name": "model.layers.6.mlp.down_proj.q_scale",
|
|
"shape": [
|
|
3072,
|
|
256
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 1572864,
|
|
"byteOffset": 12582912
|
|
},
|
|
{
|
|
"name": "model.layers.6.mlp.gate_up_proj.q_scale",
|
|
"shape": [
|
|
16384,
|
|
96
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 3145728,
|
|
"byteOffset": 14155776
|
|
},
|
|
{
|
|
"name": "model.layers.6.post_attention_layernorm.weight",
|
|
"shape": [
|
|
3072
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 6144,
|
|
"byteOffset": 17301504
|
|
},
|
|
{
|
|
"name": "model.layers.6.self_attn.qkv_proj.q_weight",
|
|
"shape": [
|
|
5120,
|
|
384
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 7864320,
|
|
"byteOffset": 17307648
|
|
},
|
|
{
|
|
"name": "model.layers.6.self_attn.qkv_proj.q_scale",
|
|
"shape": [
|
|
5120,
|
|
96
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 983040,
|
|
"byteOffset": 25171968
|
|
},
|
|
{
|
|
"name": "model.layers.6.self_attn.o_proj.q_weight",
|
|
"shape": [
|
|
3072,
|
|
384
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 4718592,
|
|
"byteOffset": 26155008
|
|
},
|
|
{
|
|
"name": "model.layers.6.self_attn.o_proj.q_scale",
|
|
"shape": [
|
|
3072,
|
|
96
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 589824,
|
|
"byteOffset": 30873600
|
|
},
|
|
{
|
|
"name": "model.layers.7.input_layernorm.weight",
|
|
"shape": [
|
|
3072
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 6144,
|
|
"byteOffset": 31463424
|
|
}
|
|
],
|
|
"md5sum": "73b45458d4f9208cb4d550e31c7a51e9"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_52.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 25165824,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.7.mlp.gate_up_proj.q_weight",
|
|
"shape": [
|
|
16384,
|
|
384
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 25165824,
|
|
"byteOffset": 0
|
|
}
|
|
],
|
|
"md5sum": "3c66d7bf5f067b401a2a0c27ea96d4b8"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_53.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 31469568,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.7.mlp.down_proj.q_weight",
|
|
"shape": [
|
|
3072,
|
|
1024
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 12582912,
|
|
"byteOffset": 0
|
|
},
|
|
{
|
|
"name": "model.layers.7.mlp.down_proj.q_scale",
|
|
"shape": [
|
|
3072,
|
|
256
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 1572864,
|
|
"byteOffset": 12582912
|
|
},
|
|
{
|
|
"name": "model.layers.7.mlp.gate_up_proj.q_scale",
|
|
"shape": [
|
|
16384,
|
|
96
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 3145728,
|
|
"byteOffset": 14155776
|
|
},
|
|
{
|
|
"name": "model.layers.7.post_attention_layernorm.weight",
|
|
"shape": [
|
|
3072
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 6144,
|
|
"byteOffset": 17301504
|
|
},
|
|
{
|
|
"name": "model.layers.7.self_attn.qkv_proj.q_weight",
|
|
"shape": [
|
|
5120,
|
|
384
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 7864320,
|
|
"byteOffset": 17307648
|
|
},
|
|
{
|
|
"name": "model.layers.7.self_attn.qkv_proj.q_scale",
|
|
"shape": [
|
|
5120,
|
|
96
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 983040,
|
|
"byteOffset": 25171968
|
|
},
|
|
{
|
|
"name": "model.layers.7.self_attn.o_proj.q_weight",
|
|
"shape": [
|
|
3072,
|
|
384
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 4718592,
|
|
"byteOffset": 26155008
|
|
},
|
|
{
|
|
"name": "model.layers.7.self_attn.o_proj.q_scale",
|
|
"shape": [
|
|
3072,
|
|
96
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 589824,
|
|
"byteOffset": 30873600
|
|
},
|
|
{
|
|
"name": "model.layers.8.input_layernorm.weight",
|
|
"shape": [
|
|
3072
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 6144,
|
|
"byteOffset": 31463424
|
|
}
|
|
],
|
|
"md5sum": "c24a899ce440188f6cbd6da4ce88e5f4"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_54.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 25165824,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.8.mlp.gate_up_proj.q_weight",
|
|
"shape": [
|
|
16384,
|
|
384
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 25165824,
|
|
"byteOffset": 0
|
|
}
|
|
],
|
|
"md5sum": "ec6c8323a07b58649574880fe603a51b"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_55.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 31469568,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.8.mlp.down_proj.q_weight",
|
|
"shape": [
|
|
3072,
|
|
1024
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 12582912,
|
|
"byteOffset": 0
|
|
},
|
|
{
|
|
"name": "model.layers.8.mlp.down_proj.q_scale",
|
|
"shape": [
|
|
3072,
|
|
256
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 1572864,
|
|
"byteOffset": 12582912
|
|
},
|
|
{
|
|
"name": "model.layers.8.mlp.gate_up_proj.q_scale",
|
|
"shape": [
|
|
16384,
|
|
96
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 3145728,
|
|
"byteOffset": 14155776
|
|
},
|
|
{
|
|
"name": "model.layers.8.post_attention_layernorm.weight",
|
|
"shape": [
|
|
3072
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 6144,
|
|
"byteOffset": 17301504
|
|
},
|
|
{
|
|
"name": "model.layers.8.self_attn.qkv_proj.q_weight",
|
|
"shape": [
|
|
5120,
|
|
384
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 7864320,
|
|
"byteOffset": 17307648
|
|
},
|
|
{
|
|
"name": "model.layers.8.self_attn.qkv_proj.q_scale",
|
|
"shape": [
|
|
5120,
|
|
96
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 983040,
|
|
"byteOffset": 25171968
|
|
},
|
|
{
|
|
"name": "model.layers.8.self_attn.o_proj.q_weight",
|
|
"shape": [
|
|
3072,
|
|
384
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 4718592,
|
|
"byteOffset": 26155008
|
|
},
|
|
{
|
|
"name": "model.layers.8.self_attn.o_proj.q_scale",
|
|
"shape": [
|
|
3072,
|
|
96
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 589824,
|
|
"byteOffset": 30873600
|
|
},
|
|
{
|
|
"name": "model.layers.9.input_layernorm.weight",
|
|
"shape": [
|
|
3072
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 6144,
|
|
"byteOffset": 31463424
|
|
}
|
|
],
|
|
"md5sum": "3bf27d6645a93eafd4d019a21e5ded4a"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_56.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 25165824,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.9.mlp.gate_up_proj.q_weight",
|
|
"shape": [
|
|
16384,
|
|
384
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 25165824,
|
|
"byteOffset": 0
|
|
}
|
|
],
|
|
"md5sum": "17eef79fc9969dc8de373faafad344ee"
|
|
},
|
|
{
|
|
"dataPath": "params_shard_57.bin",
|
|
"format": "raw-shard",
|
|
"nbytes": 31463424,
|
|
"records": [
|
|
{
|
|
"name": "model.layers.9.mlp.down_proj.q_weight",
|
|
"shape": [
|
|
3072,
|
|
1024
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 12582912,
|
|
"byteOffset": 0
|
|
},
|
|
{
|
|
"name": "model.layers.9.mlp.down_proj.q_scale",
|
|
"shape": [
|
|
3072,
|
|
256
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 1572864,
|
|
"byteOffset": 12582912
|
|
},
|
|
{
|
|
"name": "model.layers.9.mlp.gate_up_proj.q_scale",
|
|
"shape": [
|
|
16384,
|
|
96
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 3145728,
|
|
"byteOffset": 14155776
|
|
},
|
|
{
|
|
"name": "model.layers.9.post_attention_layernorm.weight",
|
|
"shape": [
|
|
3072
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 6144,
|
|
"byteOffset": 17301504
|
|
},
|
|
{
|
|
"name": "model.layers.9.self_attn.qkv_proj.q_weight",
|
|
"shape": [
|
|
5120,
|
|
384
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 7864320,
|
|
"byteOffset": 17307648
|
|
},
|
|
{
|
|
"name": "model.layers.9.self_attn.qkv_proj.q_scale",
|
|
"shape": [
|
|
5120,
|
|
96
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 983040,
|
|
"byteOffset": 25171968
|
|
},
|
|
{
|
|
"name": "model.layers.9.self_attn.o_proj.q_weight",
|
|
"shape": [
|
|
3072,
|
|
384
|
|
],
|
|
"dtype": "uint32",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 4718592,
|
|
"byteOffset": 26155008
|
|
},
|
|
{
|
|
"name": "model.layers.9.self_attn.o_proj.q_scale",
|
|
"shape": [
|
|
3072,
|
|
96
|
|
],
|
|
"dtype": "float16",
|
|
"format": "f32-to-bf16",
|
|
"nbytes": 589824,
|
|
"byteOffset": 30873600
|
|
}
|
|
],
|
|
"md5sum": "c8d3641d179867719ae7a28fd83e3aa0"
|
|
}
|
|
]
|
|
} |