mlc-q4f16_1-h2o-danube3-500m-chat / ndarray-cache.json
Felladrin's picture
Upload folder using huggingface_hub
eaf1f51 verified
{
"metadata": {
"ParamSize": 165,
"ParamBytes": 288967680.0,
"BitsPerParam": 4.50113497363691
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 24576000,
"records": [
{
"name": "model.embed_tokens.q_weight",
"shape": [
32000,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 24576000,
"byteOffset": 0
}
],
"md5sum": "633e77cd70c09a1157f8c90051b657a9"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 30723072,
"records": [
{
"name": "lm_head.q_weight",
"shape": [
32000,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 24576000,
"byteOffset": 0
},
{
"name": "lm_head.q_scale",
"shape": [
32000,
48
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072000,
"byteOffset": 24576000
},
{
"name": "model.embed_tokens.q_scale",
"shape": [
32000,
48
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072000,
"byteOffset": 27648000
},
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 30720000
}
],
"md5sum": "dca9dd04b8518413856ec25481de4e12"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 32747520,
"records": [
{
"name": "model.layers.0.mlp.down_proj.q_weight",
"shape": [
1536,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 0
},
{
"name": "model.layers.0.mlp.down_proj.q_scale",
"shape": [
1536,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 3145728
},
{
"name": "model.layers.0.mlp.gate_up_proj.q_weight",
"shape": [
8192,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 3538944
},
{
"name": "model.layers.0.mlp.gate_up_proj.q_scale",
"shape": [
8192,
48
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 9830400
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 10616832
},
{
"name": "model.layers.0.self_attn.qkv_proj.q_weight",
"shape": [
3072,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 10619904
},
{
"name": "model.layers.0.self_attn.qkv_proj.q_scale",
"shape": [
3072,
48
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 294912,
"byteOffset": 12979200
},
{
"name": "model.layers.0.self_attn.o_proj.q_weight",
"shape": [
1536,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 13274112
},
{
"name": "model.layers.0.self_attn.o_proj.q_scale",
"shape": [
1536,
48
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 14453760
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 14601216
},
{
"name": "model.layers.1.mlp.down_proj.q_weight",
"shape": [
1536,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 14604288
},
{
"name": "model.layers.1.mlp.down_proj.q_scale",
"shape": [
1536,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 17750016
},
{
"name": "model.layers.1.mlp.gate_up_proj.q_weight",
"shape": [
8192,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 18143232
},
{
"name": "model.layers.1.mlp.gate_up_proj.q_scale",
"shape": [
8192,
48
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 24434688
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 25221120
},
{
"name": "model.layers.1.self_attn.qkv_proj.q_weight",
"shape": [
3072,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 25224192
},
{
"name": "model.layers.1.self_attn.qkv_proj.q_scale",
"shape": [
3072,
48
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 294912,
"byteOffset": 27583488
},
{
"name": "model.layers.1.self_attn.o_proj.q_weight",
"shape": [
1536,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 27878400
},
{
"name": "model.layers.1.self_attn.o_proj.q_scale",
"shape": [
1536,
48
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 29058048
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 29205504
},
{
"name": "model.layers.10.mlp.down_proj.q_weight",
"shape": [
1536,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 29208576
},
{
"name": "model.layers.10.mlp.down_proj.q_scale",
"shape": [
1536,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 32354304
}
],
"md5sum": "acde503c622856d6448b2219453a9b11"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 29208576,
"records": [
{
"name": "model.layers.10.mlp.gate_up_proj.q_weight",
"shape": [
8192,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 0
},
{
"name": "model.layers.10.mlp.gate_up_proj.q_scale",
"shape": [
8192,
48
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 6291456
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 7077888
},
{
"name": "model.layers.10.self_attn.qkv_proj.q_weight",
"shape": [
3072,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 7080960
},
{
"name": "model.layers.10.self_attn.qkv_proj.q_scale",
"shape": [
3072,
48
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 294912,
"byteOffset": 9440256
},
{
"name": "model.layers.10.self_attn.o_proj.q_weight",
"shape": [
1536,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 9735168
},
{
"name": "model.layers.10.self_attn.o_proj.q_scale",
"shape": [
1536,
48
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 10914816
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 11062272
},
{
"name": "model.layers.11.mlp.down_proj.q_weight",
"shape": [
1536,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 11065344
},
{
"name": "model.layers.11.mlp.down_proj.q_scale",
"shape": [
1536,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 14211072
},
{
"name": "model.layers.11.mlp.gate_up_proj.q_weight",
"shape": [
8192,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 14604288
},
{
"name": "model.layers.11.mlp.gate_up_proj.q_scale",
"shape": [
8192,
48
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 20895744
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 21682176
},
{
"name": "model.layers.11.self_attn.qkv_proj.q_weight",
"shape": [
3072,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 21685248
},
{
"name": "model.layers.11.self_attn.qkv_proj.q_scale",
"shape": [
3072,
48
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 294912,
"byteOffset": 24044544
},
{
"name": "model.layers.11.self_attn.o_proj.q_weight",
"shape": [
1536,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 24339456
},
{
"name": "model.layers.11.self_attn.o_proj.q_scale",
"shape": [
1536,
48
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 25519104
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 25666560
},
{
"name": "model.layers.12.mlp.down_proj.q_weight",
"shape": [
1536,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 25669632
},
{
"name": "model.layers.12.mlp.down_proj.q_scale",
"shape": [
1536,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 28815360
}
],
"md5sum": "a9b819d6d1241df4d18fc34f3cd297ce"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 29208576,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.q_weight",
"shape": [
8192,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 0
},
{
"name": "model.layers.12.mlp.gate_up_proj.q_scale",
"shape": [
8192,
48
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 6291456
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 7077888
},
{
"name": "model.layers.12.self_attn.qkv_proj.q_weight",
"shape": [
3072,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 7080960
},
{
"name": "model.layers.12.self_attn.qkv_proj.q_scale",
"shape": [
3072,
48
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 294912,
"byteOffset": 9440256
},
{
"name": "model.layers.12.self_attn.o_proj.q_weight",
"shape": [
1536,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 9735168
},
{
"name": "model.layers.12.self_attn.o_proj.q_scale",
"shape": [
1536,
48
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 10914816
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 11062272
},
{
"name": "model.layers.13.mlp.down_proj.q_weight",
"shape": [
1536,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 11065344
},
{
"name": "model.layers.13.mlp.down_proj.q_scale",
"shape": [
1536,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 14211072
},
{
"name": "model.layers.13.mlp.gate_up_proj.q_weight",
"shape": [
8192,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 14604288
},
{
"name": "model.layers.13.mlp.gate_up_proj.q_scale",
"shape": [
8192,
48
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 20895744
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 21682176
},
{
"name": "model.layers.13.self_attn.qkv_proj.q_weight",
"shape": [
3072,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 21685248
},
{
"name": "model.layers.13.self_attn.qkv_proj.q_scale",
"shape": [
3072,
48
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 294912,
"byteOffset": 24044544
},
{
"name": "model.layers.13.self_attn.o_proj.q_weight",
"shape": [
1536,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 24339456
},
{
"name": "model.layers.13.self_attn.o_proj.q_scale",
"shape": [
1536,
48
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 25519104
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 25666560
},
{
"name": "model.layers.14.mlp.down_proj.q_weight",
"shape": [
1536,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 25669632
},
{
"name": "model.layers.14.mlp.down_proj.q_scale",
"shape": [
1536,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 28815360
}
],
"md5sum": "8f1c9f6da63d1f79b9405093c969ee85"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 29208576,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.q_weight",
"shape": [
8192,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 0
},
{
"name": "model.layers.14.mlp.gate_up_proj.q_scale",
"shape": [
8192,
48
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 6291456
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 7077888
},
{
"name": "model.layers.14.self_attn.qkv_proj.q_weight",
"shape": [
3072,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 7080960
},
{
"name": "model.layers.14.self_attn.qkv_proj.q_scale",
"shape": [
3072,
48
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 294912,
"byteOffset": 9440256
},
{
"name": "model.layers.14.self_attn.o_proj.q_weight",
"shape": [
1536,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 9735168
},
{
"name": "model.layers.14.self_attn.o_proj.q_scale",
"shape": [
1536,
48
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 10914816
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 11062272
},
{
"name": "model.layers.15.mlp.down_proj.q_weight",
"shape": [
1536,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 11065344
},
{
"name": "model.layers.15.mlp.down_proj.q_scale",
"shape": [
1536,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 14211072
},
{
"name": "model.layers.15.mlp.gate_up_proj.q_weight",
"shape": [
8192,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 14604288
},
{
"name": "model.layers.15.mlp.gate_up_proj.q_scale",
"shape": [
8192,
48
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 20895744
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 21682176
},
{
"name": "model.layers.15.self_attn.qkv_proj.q_weight",
"shape": [
3072,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 21685248
},
{
"name": "model.layers.15.self_attn.qkv_proj.q_scale",
"shape": [
3072,
48
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 294912,
"byteOffset": 24044544
},
{
"name": "model.layers.15.self_attn.o_proj.q_weight",
"shape": [
1536,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 24339456
},
{
"name": "model.layers.15.self_attn.o_proj.q_scale",
"shape": [
1536,
48
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 25519104
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 25666560
},
{
"name": "model.layers.2.mlp.down_proj.q_weight",
"shape": [
1536,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 25669632
},
{
"name": "model.layers.2.mlp.down_proj.q_scale",
"shape": [
1536,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 28815360
}
],
"md5sum": "ac5aea0e00011b11cc74943af756baa2"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 29208576,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.q_weight",
"shape": [
8192,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 0
},
{
"name": "model.layers.2.mlp.gate_up_proj.q_scale",
"shape": [
8192,
48
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 6291456
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 7077888
},
{
"name": "model.layers.2.self_attn.qkv_proj.q_weight",
"shape": [
3072,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 7080960
},
{
"name": "model.layers.2.self_attn.qkv_proj.q_scale",
"shape": [
3072,
48
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 294912,
"byteOffset": 9440256
},
{
"name": "model.layers.2.self_attn.o_proj.q_weight",
"shape": [
1536,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 9735168
},
{
"name": "model.layers.2.self_attn.o_proj.q_scale",
"shape": [
1536,
48
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 10914816
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 11062272
},
{
"name": "model.layers.3.mlp.down_proj.q_weight",
"shape": [
1536,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 11065344
},
{
"name": "model.layers.3.mlp.down_proj.q_scale",
"shape": [
1536,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 14211072
},
{
"name": "model.layers.3.mlp.gate_up_proj.q_weight",
"shape": [
8192,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 14604288
},
{
"name": "model.layers.3.mlp.gate_up_proj.q_scale",
"shape": [
8192,
48
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 20895744
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 21682176
},
{
"name": "model.layers.3.self_attn.qkv_proj.q_weight",
"shape": [
3072,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 21685248
},
{
"name": "model.layers.3.self_attn.qkv_proj.q_scale",
"shape": [
3072,
48
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 294912,
"byteOffset": 24044544
},
{
"name": "model.layers.3.self_attn.o_proj.q_weight",
"shape": [
1536,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 24339456
},
{
"name": "model.layers.3.self_attn.o_proj.q_scale",
"shape": [
1536,
48
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 25519104
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 25666560
},
{
"name": "model.layers.4.mlp.down_proj.q_weight",
"shape": [
1536,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 25669632
},
{
"name": "model.layers.4.mlp.down_proj.q_scale",
"shape": [
1536,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 28815360
}
],
"md5sum": "7a7cfa6e4a6a344163a815a0a3b85009"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 29208576,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.q_weight",
"shape": [
8192,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 0
},
{
"name": "model.layers.4.mlp.gate_up_proj.q_scale",
"shape": [
8192,
48
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 6291456
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 7077888
},
{
"name": "model.layers.4.self_attn.qkv_proj.q_weight",
"shape": [
3072,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 7080960
},
{
"name": "model.layers.4.self_attn.qkv_proj.q_scale",
"shape": [
3072,
48
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 294912,
"byteOffset": 9440256
},
{
"name": "model.layers.4.self_attn.o_proj.q_weight",
"shape": [
1536,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 9735168
},
{
"name": "model.layers.4.self_attn.o_proj.q_scale",
"shape": [
1536,
48
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 10914816
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 11062272
},
{
"name": "model.layers.5.mlp.down_proj.q_weight",
"shape": [
1536,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 11065344
},
{
"name": "model.layers.5.mlp.down_proj.q_scale",
"shape": [
1536,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 14211072
},
{
"name": "model.layers.5.mlp.gate_up_proj.q_weight",
"shape": [
8192,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 14604288
},
{
"name": "model.layers.5.mlp.gate_up_proj.q_scale",
"shape": [
8192,
48
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 20895744
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 21682176
},
{
"name": "model.layers.5.self_attn.qkv_proj.q_weight",
"shape": [
3072,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 21685248
},
{
"name": "model.layers.5.self_attn.qkv_proj.q_scale",
"shape": [
3072,
48
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 294912,
"byteOffset": 24044544
},
{
"name": "model.layers.5.self_attn.o_proj.q_weight",
"shape": [
1536,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 24339456
},
{
"name": "model.layers.5.self_attn.o_proj.q_scale",
"shape": [
1536,
48
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 25519104
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 25666560
},
{
"name": "model.layers.6.mlp.down_proj.q_weight",
"shape": [
1536,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 25669632
},
{
"name": "model.layers.6.mlp.down_proj.q_scale",
"shape": [
1536,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 28815360
}
],
"md5sum": "43b8567c231fcd6ceb9d289e2a5e74c1"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 29208576,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.q_weight",
"shape": [
8192,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 0
},
{
"name": "model.layers.6.mlp.gate_up_proj.q_scale",
"shape": [
8192,
48
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 6291456
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 7077888
},
{
"name": "model.layers.6.self_attn.qkv_proj.q_weight",
"shape": [
3072,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 7080960
},
{
"name": "model.layers.6.self_attn.qkv_proj.q_scale",
"shape": [
3072,
48
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 294912,
"byteOffset": 9440256
},
{
"name": "model.layers.6.self_attn.o_proj.q_weight",
"shape": [
1536,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 9735168
},
{
"name": "model.layers.6.self_attn.o_proj.q_scale",
"shape": [
1536,
48
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 10914816
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 11062272
},
{
"name": "model.layers.7.mlp.down_proj.q_weight",
"shape": [
1536,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 11065344
},
{
"name": "model.layers.7.mlp.down_proj.q_scale",
"shape": [
1536,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 14211072
},
{
"name": "model.layers.7.mlp.gate_up_proj.q_weight",
"shape": [
8192,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 14604288
},
{
"name": "model.layers.7.mlp.gate_up_proj.q_scale",
"shape": [
8192,
48
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 20895744
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 21682176
},
{
"name": "model.layers.7.self_attn.qkv_proj.q_weight",
"shape": [
3072,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 21685248
},
{
"name": "model.layers.7.self_attn.qkv_proj.q_scale",
"shape": [
3072,
48
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 294912,
"byteOffset": 24044544
},
{
"name": "model.layers.7.self_attn.o_proj.q_weight",
"shape": [
1536,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 24339456
},
{
"name": "model.layers.7.self_attn.o_proj.q_scale",
"shape": [
1536,
48
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 25519104
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 25666560
},
{
"name": "model.layers.8.mlp.down_proj.q_weight",
"shape": [
1536,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 25669632
},
{
"name": "model.layers.8.mlp.down_proj.q_scale",
"shape": [
1536,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 28815360
}
],
"md5sum": "42f4a095eccd005499430d3995f1dd3f"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 25669632,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.q_weight",
"shape": [
8192,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 0
},
{
"name": "model.layers.8.mlp.gate_up_proj.q_scale",
"shape": [
8192,
48
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 6291456
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 7077888
},
{
"name": "model.layers.8.self_attn.qkv_proj.q_weight",
"shape": [
3072,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 7080960
},
{
"name": "model.layers.8.self_attn.qkv_proj.q_scale",
"shape": [
3072,
48
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 294912,
"byteOffset": 9440256
},
{
"name": "model.layers.8.self_attn.o_proj.q_weight",
"shape": [
1536,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 9735168
},
{
"name": "model.layers.8.self_attn.o_proj.q_scale",
"shape": [
1536,
48
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 10914816
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 11062272
},
{
"name": "model.layers.9.mlp.down_proj.q_weight",
"shape": [
1536,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 11065344
},
{
"name": "model.layers.9.mlp.down_proj.q_scale",
"shape": [
1536,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 14211072
},
{
"name": "model.layers.9.mlp.gate_up_proj.q_weight",
"shape": [
8192,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 6291456,
"byteOffset": 14604288
},
{
"name": "model.layers.9.mlp.gate_up_proj.q_scale",
"shape": [
8192,
48
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 786432,
"byteOffset": 20895744
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 21682176
},
{
"name": "model.layers.9.self_attn.qkv_proj.q_weight",
"shape": [
3072,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 21685248
},
{
"name": "model.layers.9.self_attn.qkv_proj.q_scale",
"shape": [
3072,
48
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 294912,
"byteOffset": 24044544
},
{
"name": "model.layers.9.self_attn.o_proj.q_weight",
"shape": [
1536,
192
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1179648,
"byteOffset": 24339456
},
{
"name": "model.layers.9.self_attn.o_proj.q_scale",
"shape": [
1536,
48
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 25519104
},
{
"name": "model.norm.weight",
"shape": [
1536
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3072,
"byteOffset": 25666560
}
],
"md5sum": "8d3a7724603c587b5c168bb4500903c0"
}
]
}