Qwen1.5-0.5B-Chat-q4f32_1-MLC / ndarray-cache.json
riczhou's picture
Initial commit
ac8c7ff verified
raw
history blame
103 kB
{
"metadata": {
"ParamSize": 269,
"ParamBytes": 387649536.0,
"BitsPerParam": 5.005399562680047
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 77791232,
"records": [
{
"name": "lm_head.q_weight",
"shape": [
151936,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 77791232,
"byteOffset": 0
}
],
"md5sum": "47249f991afc8ef584e8c2435ca90870"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 77791232,
"records": [
{
"name": "model.embed_tokens.q_weight",
"shape": [
151936,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 77791232,
"byteOffset": 0
}
],
"md5sum": "47249f991afc8ef584e8c2435ca90870"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 33329152,
"records": [
{
"name": "lm_head.q_scale",
"shape": [
151936,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 9723904,
"byteOffset": 0
},
{
"name": "model.embed_tokens.q_scale",
"shape": [
151936,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 9723904,
"byteOffset": 9723904
},
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
1024
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 19447808
},
{
"name": "model.layers.0.mlp.down_proj.q_weight",
"shape": [
1024,
352
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1441792,
"byteOffset": 19449856
},
{
"name": "model.layers.0.mlp.down_proj.q_scale",
"shape": [
1024,
88
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 180224,
"byteOffset": 20891648
},
{
"name": "model.layers.0.mlp.gate_up_proj.q_weight",
"shape": [
5632,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2883584,
"byteOffset": 21071872
},
{
"name": "model.layers.0.mlp.gate_up_proj.q_scale",
"shape": [
5632,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 360448,
"byteOffset": 23955456
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
1024
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 24315904
},
{
"name": "model.layers.0.self_attn.c_attn.bias",
"shape": [
3072
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 24317952
},
{
"name": "model.layers.0.self_attn.c_attn.q_weight",
"shape": [
3072,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 24324096
},
{
"name": "model.layers.0.self_attn.c_attn.q_scale",
"shape": [
3072,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 196608,
"byteOffset": 25896960
},
{
"name": "model.layers.0.self_attn.o_proj.q_weight",
"shape": [
1024,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 524288,
"byteOffset": 26093568
},
{
"name": "model.layers.0.self_attn.o_proj.q_scale",
"shape": [
1024,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 26617856
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
1024
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 26683392
},
{
"name": "model.layers.1.mlp.down_proj.q_weight",
"shape": [
1024,
352
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1441792,
"byteOffset": 26685440
},
{
"name": "model.layers.1.mlp.down_proj.q_scale",
"shape": [
1024,
88
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 180224,
"byteOffset": 28127232
},
{
"name": "model.layers.1.mlp.gate_up_proj.q_weight",
"shape": [
5632,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2883584,
"byteOffset": 28307456
},
{
"name": "model.layers.1.mlp.gate_up_proj.q_scale",
"shape": [
5632,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 360448,
"byteOffset": 31191040
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
1024
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 31551488
},
{
"name": "model.layers.1.self_attn.c_attn.bias",
"shape": [
3072
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 31553536
},
{
"name": "model.layers.1.self_attn.c_attn.q_weight",
"shape": [
3072,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 31559680
},
{
"name": "model.layers.1.self_attn.c_attn.q_scale",
"shape": [
3072,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 196608,
"byteOffset": 33132544
}
],
"md5sum": "5bb7ad6255b35bf3e54d021b245bc9f0"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 31156224,
"records": [
{
"name": "model.layers.1.self_attn.o_proj.q_weight",
"shape": [
1024,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 524288,
"byteOffset": 0
},
{
"name": "model.layers.1.self_attn.o_proj.q_scale",
"shape": [
1024,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 524288
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
1024
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 589824
},
{
"name": "model.layers.10.mlp.down_proj.q_weight",
"shape": [
1024,
352
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1441792,
"byteOffset": 591872
},
{
"name": "model.layers.10.mlp.down_proj.q_scale",
"shape": [
1024,
88
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 180224,
"byteOffset": 2033664
},
{
"name": "model.layers.10.mlp.gate_up_proj.q_weight",
"shape": [
5632,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2883584,
"byteOffset": 2213888
},
{
"name": "model.layers.10.mlp.gate_up_proj.q_scale",
"shape": [
5632,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 360448,
"byteOffset": 5097472
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
1024
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 5457920
},
{
"name": "model.layers.10.self_attn.c_attn.bias",
"shape": [
3072
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 5459968
},
{
"name": "model.layers.10.self_attn.c_attn.q_weight",
"shape": [
3072,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 5466112
},
{
"name": "model.layers.10.self_attn.c_attn.q_scale",
"shape": [
3072,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 196608,
"byteOffset": 7038976
},
{
"name": "model.layers.10.self_attn.o_proj.q_weight",
"shape": [
1024,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 524288,
"byteOffset": 7235584
},
{
"name": "model.layers.10.self_attn.o_proj.q_scale",
"shape": [
1024,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 7759872
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
1024
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 7825408
},
{
"name": "model.layers.11.mlp.down_proj.q_weight",
"shape": [
1024,
352
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1441792,
"byteOffset": 7827456
},
{
"name": "model.layers.11.mlp.down_proj.q_scale",
"shape": [
1024,
88
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 180224,
"byteOffset": 9269248
},
{
"name": "model.layers.11.mlp.gate_up_proj.q_weight",
"shape": [
5632,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2883584,
"byteOffset": 9449472
},
{
"name": "model.layers.11.mlp.gate_up_proj.q_scale",
"shape": [
5632,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 360448,
"byteOffset": 12333056
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
1024
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 12693504
},
{
"name": "model.layers.11.self_attn.c_attn.bias",
"shape": [
3072
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 12695552
},
{
"name": "model.layers.11.self_attn.c_attn.q_weight",
"shape": [
3072,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 12701696
},
{
"name": "model.layers.11.self_attn.c_attn.q_scale",
"shape": [
3072,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 196608,
"byteOffset": 14274560
},
{
"name": "model.layers.11.self_attn.o_proj.q_weight",
"shape": [
1024,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 524288,
"byteOffset": 14471168
},
{
"name": "model.layers.11.self_attn.o_proj.q_scale",
"shape": [
1024,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 14995456
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
1024
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 15060992
},
{
"name": "model.layers.12.mlp.down_proj.q_weight",
"shape": [
1024,
352
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1441792,
"byteOffset": 15063040
},
{
"name": "model.layers.12.mlp.down_proj.q_scale",
"shape": [
1024,
88
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 180224,
"byteOffset": 16504832
},
{
"name": "model.layers.12.mlp.gate_up_proj.q_weight",
"shape": [
5632,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2883584,
"byteOffset": 16685056
},
{
"name": "model.layers.12.mlp.gate_up_proj.q_scale",
"shape": [
5632,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 360448,
"byteOffset": 19568640
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
1024
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 19929088
},
{
"name": "model.layers.12.self_attn.c_attn.bias",
"shape": [
3072
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 19931136
},
{
"name": "model.layers.12.self_attn.c_attn.q_weight",
"shape": [
3072,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 19937280
},
{
"name": "model.layers.12.self_attn.c_attn.q_scale",
"shape": [
3072,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 196608,
"byteOffset": 21510144
},
{
"name": "model.layers.12.self_attn.o_proj.q_weight",
"shape": [
1024,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 524288,
"byteOffset": 21706752
},
{
"name": "model.layers.12.self_attn.o_proj.q_scale",
"shape": [
1024,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 22231040
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
1024
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 22296576
},
{
"name": "model.layers.13.mlp.down_proj.q_weight",
"shape": [
1024,
352
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1441792,
"byteOffset": 22298624
},
{
"name": "model.layers.13.mlp.down_proj.q_scale",
"shape": [
1024,
88
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 180224,
"byteOffset": 23740416
},
{
"name": "model.layers.13.mlp.gate_up_proj.q_weight",
"shape": [
5632,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2883584,
"byteOffset": 23920640
},
{
"name": "model.layers.13.mlp.gate_up_proj.q_scale",
"shape": [
5632,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 360448,
"byteOffset": 26804224
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
1024
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 27164672
},
{
"name": "model.layers.13.self_attn.c_attn.bias",
"shape": [
3072
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 27166720
},
{
"name": "model.layers.13.self_attn.c_attn.q_weight",
"shape": [
3072,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 27172864
},
{
"name": "model.layers.13.self_attn.c_attn.q_scale",
"shape": [
3072,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 196608,
"byteOffset": 28745728
},
{
"name": "model.layers.13.self_attn.o_proj.q_weight",
"shape": [
1024,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 524288,
"byteOffset": 28942336
},
{
"name": "model.layers.13.self_attn.o_proj.q_scale",
"shape": [
1024,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 29466624
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
1024
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 29532160
},
{
"name": "model.layers.14.mlp.down_proj.q_weight",
"shape": [
1024,
352
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1441792,
"byteOffset": 29534208
},
{
"name": "model.layers.14.mlp.down_proj.q_scale",
"shape": [
1024,
88
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 180224,
"byteOffset": 30976000
}
],
"md5sum": "991e0aecad9c88a5cc1ca0be85238687"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 32194560,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.q_weight",
"shape": [
5632,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2883584,
"byteOffset": 0
},
{
"name": "model.layers.14.mlp.gate_up_proj.q_scale",
"shape": [
5632,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 360448,
"byteOffset": 2883584
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
1024
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 3244032
},
{
"name": "model.layers.14.self_attn.c_attn.bias",
"shape": [
3072
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 3246080
},
{
"name": "model.layers.14.self_attn.c_attn.q_weight",
"shape": [
3072,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 3252224
},
{
"name": "model.layers.14.self_attn.c_attn.q_scale",
"shape": [
3072,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 196608,
"byteOffset": 4825088
},
{
"name": "model.layers.14.self_attn.o_proj.q_weight",
"shape": [
1024,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 524288,
"byteOffset": 5021696
},
{
"name": "model.layers.14.self_attn.o_proj.q_scale",
"shape": [
1024,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 5545984
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
1024
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 5611520
},
{
"name": "model.layers.15.mlp.down_proj.q_weight",
"shape": [
1024,
352
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1441792,
"byteOffset": 5613568
},
{
"name": "model.layers.15.mlp.down_proj.q_scale",
"shape": [
1024,
88
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 180224,
"byteOffset": 7055360
},
{
"name": "model.layers.15.mlp.gate_up_proj.q_weight",
"shape": [
5632,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2883584,
"byteOffset": 7235584
},
{
"name": "model.layers.15.mlp.gate_up_proj.q_scale",
"shape": [
5632,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 360448,
"byteOffset": 10119168
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
1024
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 10479616
},
{
"name": "model.layers.15.self_attn.c_attn.bias",
"shape": [
3072
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 10481664
},
{
"name": "model.layers.15.self_attn.c_attn.q_weight",
"shape": [
3072,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 10487808
},
{
"name": "model.layers.15.self_attn.c_attn.q_scale",
"shape": [
3072,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 196608,
"byteOffset": 12060672
},
{
"name": "model.layers.15.self_attn.o_proj.q_weight",
"shape": [
1024,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 524288,
"byteOffset": 12257280
},
{
"name": "model.layers.15.self_attn.o_proj.q_scale",
"shape": [
1024,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 12781568
},
{
"name": "model.layers.16.input_layernorm.weight",
"shape": [
1024
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 12847104
},
{
"name": "model.layers.16.mlp.down_proj.q_weight",
"shape": [
1024,
352
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1441792,
"byteOffset": 12849152
},
{
"name": "model.layers.16.mlp.down_proj.q_scale",
"shape": [
1024,
88
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 180224,
"byteOffset": 14290944
},
{
"name": "model.layers.16.mlp.gate_up_proj.q_weight",
"shape": [
5632,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2883584,
"byteOffset": 14471168
},
{
"name": "model.layers.16.mlp.gate_up_proj.q_scale",
"shape": [
5632,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 360448,
"byteOffset": 17354752
},
{
"name": "model.layers.16.post_attention_layernorm.weight",
"shape": [
1024
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 17715200
},
{
"name": "model.layers.16.self_attn.c_attn.bias",
"shape": [
3072
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 17717248
},
{
"name": "model.layers.16.self_attn.c_attn.q_weight",
"shape": [
3072,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 17723392
},
{
"name": "model.layers.16.self_attn.c_attn.q_scale",
"shape": [
3072,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 196608,
"byteOffset": 19296256
},
{
"name": "model.layers.16.self_attn.o_proj.q_weight",
"shape": [
1024,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 524288,
"byteOffset": 19492864
},
{
"name": "model.layers.16.self_attn.o_proj.q_scale",
"shape": [
1024,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 20017152
},
{
"name": "model.layers.17.input_layernorm.weight",
"shape": [
1024
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 20082688
},
{
"name": "model.layers.17.mlp.down_proj.q_weight",
"shape": [
1024,
352
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1441792,
"byteOffset": 20084736
},
{
"name": "model.layers.17.mlp.down_proj.q_scale",
"shape": [
1024,
88
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 180224,
"byteOffset": 21526528
},
{
"name": "model.layers.17.mlp.gate_up_proj.q_weight",
"shape": [
5632,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2883584,
"byteOffset": 21706752
},
{
"name": "model.layers.17.mlp.gate_up_proj.q_scale",
"shape": [
5632,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 360448,
"byteOffset": 24590336
},
{
"name": "model.layers.17.post_attention_layernorm.weight",
"shape": [
1024
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 24950784
},
{
"name": "model.layers.17.self_attn.c_attn.bias",
"shape": [
3072
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 24952832
},
{
"name": "model.layers.17.self_attn.c_attn.q_weight",
"shape": [
3072,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 24958976
},
{
"name": "model.layers.17.self_attn.c_attn.q_scale",
"shape": [
3072,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 196608,
"byteOffset": 26531840
},
{
"name": "model.layers.17.self_attn.o_proj.q_weight",
"shape": [
1024,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 524288,
"byteOffset": 26728448
},
{
"name": "model.layers.17.self_attn.o_proj.q_scale",
"shape": [
1024,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 27252736
},
{
"name": "model.layers.18.input_layernorm.weight",
"shape": [
1024
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 27318272
},
{
"name": "model.layers.18.mlp.down_proj.q_weight",
"shape": [
1024,
352
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1441792,
"byteOffset": 27320320
},
{
"name": "model.layers.18.mlp.down_proj.q_scale",
"shape": [
1024,
88
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 180224,
"byteOffset": 28762112
},
{
"name": "model.layers.18.mlp.gate_up_proj.q_weight",
"shape": [
5632,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2883584,
"byteOffset": 28942336
},
{
"name": "model.layers.18.mlp.gate_up_proj.q_scale",
"shape": [
5632,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 360448,
"byteOffset": 31825920
},
{
"name": "model.layers.18.post_attention_layernorm.weight",
"shape": [
1024
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 32186368
},
{
"name": "model.layers.18.self_attn.c_attn.bias",
"shape": [
3072
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 32188416
}
],
"md5sum": "cd0fce2641f8d5513fded0d5552fabe9"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 32925696,
"records": [
{
"name": "model.layers.18.self_attn.c_attn.q_weight",
"shape": [
3072,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 0
},
{
"name": "model.layers.18.self_attn.c_attn.q_scale",
"shape": [
3072,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 196608,
"byteOffset": 1572864
},
{
"name": "model.layers.18.self_attn.o_proj.q_weight",
"shape": [
1024,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 524288,
"byteOffset": 1769472
},
{
"name": "model.layers.18.self_attn.o_proj.q_scale",
"shape": [
1024,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 2293760
},
{
"name": "model.layers.19.input_layernorm.weight",
"shape": [
1024
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 2359296
},
{
"name": "model.layers.19.mlp.down_proj.q_weight",
"shape": [
1024,
352
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1441792,
"byteOffset": 2361344
},
{
"name": "model.layers.19.mlp.down_proj.q_scale",
"shape": [
1024,
88
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 180224,
"byteOffset": 3803136
},
{
"name": "model.layers.19.mlp.gate_up_proj.q_weight",
"shape": [
5632,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2883584,
"byteOffset": 3983360
},
{
"name": "model.layers.19.mlp.gate_up_proj.q_scale",
"shape": [
5632,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 360448,
"byteOffset": 6866944
},
{
"name": "model.layers.19.post_attention_layernorm.weight",
"shape": [
1024
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 7227392
},
{
"name": "model.layers.19.self_attn.c_attn.bias",
"shape": [
3072
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 7229440
},
{
"name": "model.layers.19.self_attn.c_attn.q_weight",
"shape": [
3072,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 7235584
},
{
"name": "model.layers.19.self_attn.c_attn.q_scale",
"shape": [
3072,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 196608,
"byteOffset": 8808448
},
{
"name": "model.layers.19.self_attn.o_proj.q_weight",
"shape": [
1024,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 524288,
"byteOffset": 9005056
},
{
"name": "model.layers.19.self_attn.o_proj.q_scale",
"shape": [
1024,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 9529344
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
1024
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 9594880
},
{
"name": "model.layers.2.mlp.down_proj.q_weight",
"shape": [
1024,
352
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1441792,
"byteOffset": 9596928
},
{
"name": "model.layers.2.mlp.down_proj.q_scale",
"shape": [
1024,
88
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 180224,
"byteOffset": 11038720
},
{
"name": "model.layers.2.mlp.gate_up_proj.q_weight",
"shape": [
5632,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2883584,
"byteOffset": 11218944
},
{
"name": "model.layers.2.mlp.gate_up_proj.q_scale",
"shape": [
5632,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 360448,
"byteOffset": 14102528
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
1024
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 14462976
},
{
"name": "model.layers.2.self_attn.c_attn.bias",
"shape": [
3072
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 14465024
},
{
"name": "model.layers.2.self_attn.c_attn.q_weight",
"shape": [
3072,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 14471168
},
{
"name": "model.layers.2.self_attn.c_attn.q_scale",
"shape": [
3072,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 196608,
"byteOffset": 16044032
},
{
"name": "model.layers.2.self_attn.o_proj.q_weight",
"shape": [
1024,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 524288,
"byteOffset": 16240640
},
{
"name": "model.layers.2.self_attn.o_proj.q_scale",
"shape": [
1024,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 16764928
},
{
"name": "model.layers.20.input_layernorm.weight",
"shape": [
1024
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 16830464
},
{
"name": "model.layers.20.mlp.down_proj.q_weight",
"shape": [
1024,
352
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1441792,
"byteOffset": 16832512
},
{
"name": "model.layers.20.mlp.down_proj.q_scale",
"shape": [
1024,
88
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 180224,
"byteOffset": 18274304
},
{
"name": "model.layers.20.mlp.gate_up_proj.q_weight",
"shape": [
5632,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2883584,
"byteOffset": 18454528
},
{
"name": "model.layers.20.mlp.gate_up_proj.q_scale",
"shape": [
5632,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 360448,
"byteOffset": 21338112
},
{
"name": "model.layers.20.post_attention_layernorm.weight",
"shape": [
1024
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 21698560
},
{
"name": "model.layers.20.self_attn.c_attn.bias",
"shape": [
3072
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 21700608
},
{
"name": "model.layers.20.self_attn.c_attn.q_weight",
"shape": [
3072,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 21706752
},
{
"name": "model.layers.20.self_attn.c_attn.q_scale",
"shape": [
3072,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 196608,
"byteOffset": 23279616
},
{
"name": "model.layers.20.self_attn.o_proj.q_weight",
"shape": [
1024,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 524288,
"byteOffset": 23476224
},
{
"name": "model.layers.20.self_attn.o_proj.q_scale",
"shape": [
1024,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 24000512
},
{
"name": "model.layers.21.input_layernorm.weight",
"shape": [
1024
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 24066048
},
{
"name": "model.layers.21.mlp.down_proj.q_weight",
"shape": [
1024,
352
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1441792,
"byteOffset": 24068096
},
{
"name": "model.layers.21.mlp.down_proj.q_scale",
"shape": [
1024,
88
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 180224,
"byteOffset": 25509888
},
{
"name": "model.layers.21.mlp.gate_up_proj.q_weight",
"shape": [
5632,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2883584,
"byteOffset": 25690112
},
{
"name": "model.layers.21.mlp.gate_up_proj.q_scale",
"shape": [
5632,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 360448,
"byteOffset": 28573696
},
{
"name": "model.layers.21.post_attention_layernorm.weight",
"shape": [
1024
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 28934144
},
{
"name": "model.layers.21.self_attn.c_attn.bias",
"shape": [
3072
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 28936192
},
{
"name": "model.layers.21.self_attn.c_attn.q_weight",
"shape": [
3072,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 28942336
},
{
"name": "model.layers.21.self_attn.c_attn.q_scale",
"shape": [
3072,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 196608,
"byteOffset": 30515200
},
{
"name": "model.layers.21.self_attn.o_proj.q_weight",
"shape": [
1024,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 524288,
"byteOffset": 30711808
},
{
"name": "model.layers.21.self_attn.o_proj.q_scale",
"shape": [
1024,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 31236096
},
{
"name": "model.layers.22.input_layernorm.weight",
"shape": [
1024
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 31301632
},
{
"name": "model.layers.22.mlp.down_proj.q_weight",
"shape": [
1024,
352
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1441792,
"byteOffset": 31303680
},
{
"name": "model.layers.22.mlp.down_proj.q_scale",
"shape": [
1024,
88
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 180224,
"byteOffset": 32745472
}
],
"md5sum": "4bebe31dccbfe72d305fc95478773fc2"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 32194560,
"records": [
{
"name": "model.layers.22.mlp.gate_up_proj.q_weight",
"shape": [
5632,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2883584,
"byteOffset": 0
},
{
"name": "model.layers.22.mlp.gate_up_proj.q_scale",
"shape": [
5632,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 360448,
"byteOffset": 2883584
},
{
"name": "model.layers.22.post_attention_layernorm.weight",
"shape": [
1024
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 3244032
},
{
"name": "model.layers.22.self_attn.c_attn.bias",
"shape": [
3072
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 3246080
},
{
"name": "model.layers.22.self_attn.c_attn.q_weight",
"shape": [
3072,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 3252224
},
{
"name": "model.layers.22.self_attn.c_attn.q_scale",
"shape": [
3072,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 196608,
"byteOffset": 4825088
},
{
"name": "model.layers.22.self_attn.o_proj.q_weight",
"shape": [
1024,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 524288,
"byteOffset": 5021696
},
{
"name": "model.layers.22.self_attn.o_proj.q_scale",
"shape": [
1024,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 5545984
},
{
"name": "model.layers.23.input_layernorm.weight",
"shape": [
1024
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 5611520
},
{
"name": "model.layers.23.mlp.down_proj.q_weight",
"shape": [
1024,
352
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1441792,
"byteOffset": 5613568
},
{
"name": "model.layers.23.mlp.down_proj.q_scale",
"shape": [
1024,
88
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 180224,
"byteOffset": 7055360
},
{
"name": "model.layers.23.mlp.gate_up_proj.q_weight",
"shape": [
5632,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2883584,
"byteOffset": 7235584
},
{
"name": "model.layers.23.mlp.gate_up_proj.q_scale",
"shape": [
5632,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 360448,
"byteOffset": 10119168
},
{
"name": "model.layers.23.post_attention_layernorm.weight",
"shape": [
1024
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 10479616
},
{
"name": "model.layers.23.self_attn.c_attn.bias",
"shape": [
3072
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 10481664
},
{
"name": "model.layers.23.self_attn.c_attn.q_weight",
"shape": [
3072,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 10487808
},
{
"name": "model.layers.23.self_attn.c_attn.q_scale",
"shape": [
3072,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 196608,
"byteOffset": 12060672
},
{
"name": "model.layers.23.self_attn.o_proj.q_weight",
"shape": [
1024,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 524288,
"byteOffset": 12257280
},
{
"name": "model.layers.23.self_attn.o_proj.q_scale",
"shape": [
1024,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 12781568
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
1024
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 12847104
},
{
"name": "model.layers.3.mlp.down_proj.q_weight",
"shape": [
1024,
352
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1441792,
"byteOffset": 12849152
},
{
"name": "model.layers.3.mlp.down_proj.q_scale",
"shape": [
1024,
88
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 180224,
"byteOffset": 14290944
},
{
"name": "model.layers.3.mlp.gate_up_proj.q_weight",
"shape": [
5632,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2883584,
"byteOffset": 14471168
},
{
"name": "model.layers.3.mlp.gate_up_proj.q_scale",
"shape": [
5632,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 360448,
"byteOffset": 17354752
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
1024
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 17715200
},
{
"name": "model.layers.3.self_attn.c_attn.bias",
"shape": [
3072
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 17717248
},
{
"name": "model.layers.3.self_attn.c_attn.q_weight",
"shape": [
3072,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 17723392
},
{
"name": "model.layers.3.self_attn.c_attn.q_scale",
"shape": [
3072,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 196608,
"byteOffset": 19296256
},
{
"name": "model.layers.3.self_attn.o_proj.q_weight",
"shape": [
1024,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 524288,
"byteOffset": 19492864
},
{
"name": "model.layers.3.self_attn.o_proj.q_scale",
"shape": [
1024,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 20017152
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
1024
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 20082688
},
{
"name": "model.layers.4.mlp.down_proj.q_weight",
"shape": [
1024,
352
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1441792,
"byteOffset": 20084736
},
{
"name": "model.layers.4.mlp.down_proj.q_scale",
"shape": [
1024,
88
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 180224,
"byteOffset": 21526528
},
{
"name": "model.layers.4.mlp.gate_up_proj.q_weight",
"shape": [
5632,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2883584,
"byteOffset": 21706752
},
{
"name": "model.layers.4.mlp.gate_up_proj.q_scale",
"shape": [
5632,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 360448,
"byteOffset": 24590336
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
1024
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 24950784
},
{
"name": "model.layers.4.self_attn.c_attn.bias",
"shape": [
3072
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 24952832
},
{
"name": "model.layers.4.self_attn.c_attn.q_weight",
"shape": [
3072,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 24958976
},
{
"name": "model.layers.4.self_attn.c_attn.q_scale",
"shape": [
3072,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 196608,
"byteOffset": 26531840
},
{
"name": "model.layers.4.self_attn.o_proj.q_weight",
"shape": [
1024,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 524288,
"byteOffset": 26728448
},
{
"name": "model.layers.4.self_attn.o_proj.q_scale",
"shape": [
1024,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 27252736
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
1024
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 27318272
},
{
"name": "model.layers.5.mlp.down_proj.q_weight",
"shape": [
1024,
352
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1441792,
"byteOffset": 27320320
},
{
"name": "model.layers.5.mlp.down_proj.q_scale",
"shape": [
1024,
88
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 180224,
"byteOffset": 28762112
},
{
"name": "model.layers.5.mlp.gate_up_proj.q_weight",
"shape": [
5632,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2883584,
"byteOffset": 28942336
},
{
"name": "model.layers.5.mlp.gate_up_proj.q_scale",
"shape": [
5632,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 360448,
"byteOffset": 31825920
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
1024
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 32186368
},
{
"name": "model.layers.5.self_attn.c_attn.bias",
"shape": [
3072
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 32188416
}
],
"md5sum": "bd3887400fca31b3e10531bdc8589492"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 31303680,
"records": [
{
"name": "model.layers.5.self_attn.c_attn.q_weight",
"shape": [
3072,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 0
},
{
"name": "model.layers.5.self_attn.c_attn.q_scale",
"shape": [
3072,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 196608,
"byteOffset": 1572864
},
{
"name": "model.layers.5.self_attn.o_proj.q_weight",
"shape": [
1024,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 524288,
"byteOffset": 1769472
},
{
"name": "model.layers.5.self_attn.o_proj.q_scale",
"shape": [
1024,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 2293760
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
1024
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 2359296
},
{
"name": "model.layers.6.mlp.down_proj.q_weight",
"shape": [
1024,
352
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1441792,
"byteOffset": 2361344
},
{
"name": "model.layers.6.mlp.down_proj.q_scale",
"shape": [
1024,
88
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 180224,
"byteOffset": 3803136
},
{
"name": "model.layers.6.mlp.gate_up_proj.q_weight",
"shape": [
5632,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2883584,
"byteOffset": 3983360
},
{
"name": "model.layers.6.mlp.gate_up_proj.q_scale",
"shape": [
5632,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 360448,
"byteOffset": 6866944
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
1024
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 7227392
},
{
"name": "model.layers.6.self_attn.c_attn.bias",
"shape": [
3072
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 7229440
},
{
"name": "model.layers.6.self_attn.c_attn.q_weight",
"shape": [
3072,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 7235584
},
{
"name": "model.layers.6.self_attn.c_attn.q_scale",
"shape": [
3072,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 196608,
"byteOffset": 8808448
},
{
"name": "model.layers.6.self_attn.o_proj.q_weight",
"shape": [
1024,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 524288,
"byteOffset": 9005056
},
{
"name": "model.layers.6.self_attn.o_proj.q_scale",
"shape": [
1024,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 9529344
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
1024
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 9594880
},
{
"name": "model.layers.7.mlp.down_proj.q_weight",
"shape": [
1024,
352
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1441792,
"byteOffset": 9596928
},
{
"name": "model.layers.7.mlp.down_proj.q_scale",
"shape": [
1024,
88
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 180224,
"byteOffset": 11038720
},
{
"name": "model.layers.7.mlp.gate_up_proj.q_weight",
"shape": [
5632,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2883584,
"byteOffset": 11218944
},
{
"name": "model.layers.7.mlp.gate_up_proj.q_scale",
"shape": [
5632,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 360448,
"byteOffset": 14102528
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
1024
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 14462976
},
{
"name": "model.layers.7.self_attn.c_attn.bias",
"shape": [
3072
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 14465024
},
{
"name": "model.layers.7.self_attn.c_attn.q_weight",
"shape": [
3072,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 14471168
},
{
"name": "model.layers.7.self_attn.c_attn.q_scale",
"shape": [
3072,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 196608,
"byteOffset": 16044032
},
{
"name": "model.layers.7.self_attn.o_proj.q_weight",
"shape": [
1024,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 524288,
"byteOffset": 16240640
},
{
"name": "model.layers.7.self_attn.o_proj.q_scale",
"shape": [
1024,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 16764928
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
1024
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 16830464
},
{
"name": "model.layers.8.mlp.down_proj.q_weight",
"shape": [
1024,
352
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1441792,
"byteOffset": 16832512
},
{
"name": "model.layers.8.mlp.down_proj.q_scale",
"shape": [
1024,
88
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 180224,
"byteOffset": 18274304
},
{
"name": "model.layers.8.mlp.gate_up_proj.q_weight",
"shape": [
5632,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2883584,
"byteOffset": 18454528
},
{
"name": "model.layers.8.mlp.gate_up_proj.q_scale",
"shape": [
5632,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 360448,
"byteOffset": 21338112
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
1024
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 21698560
},
{
"name": "model.layers.8.self_attn.c_attn.bias",
"shape": [
3072
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 21700608
},
{
"name": "model.layers.8.self_attn.c_attn.q_weight",
"shape": [
3072,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 21706752
},
{
"name": "model.layers.8.self_attn.c_attn.q_scale",
"shape": [
3072,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 196608,
"byteOffset": 23279616
},
{
"name": "model.layers.8.self_attn.o_proj.q_weight",
"shape": [
1024,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 524288,
"byteOffset": 23476224
},
{
"name": "model.layers.8.self_attn.o_proj.q_scale",
"shape": [
1024,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 24000512
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
1024
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 24066048
},
{
"name": "model.layers.9.mlp.down_proj.q_weight",
"shape": [
1024,
352
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1441792,
"byteOffset": 24068096
},
{
"name": "model.layers.9.mlp.down_proj.q_scale",
"shape": [
1024,
88
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 180224,
"byteOffset": 25509888
},
{
"name": "model.layers.9.mlp.gate_up_proj.q_weight",
"shape": [
5632,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2883584,
"byteOffset": 25690112
},
{
"name": "model.layers.9.mlp.gate_up_proj.q_scale",
"shape": [
5632,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 360448,
"byteOffset": 28573696
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
1024
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 28934144
},
{
"name": "model.layers.9.self_attn.c_attn.bias",
"shape": [
3072
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 6144,
"byteOffset": 28936192
},
{
"name": "model.layers.9.self_attn.c_attn.q_weight",
"shape": [
3072,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1572864,
"byteOffset": 28942336
},
{
"name": "model.layers.9.self_attn.c_attn.q_scale",
"shape": [
3072,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 196608,
"byteOffset": 30515200
},
{
"name": "model.layers.9.self_attn.o_proj.q_weight",
"shape": [
1024,
128
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 524288,
"byteOffset": 30711808
},
{
"name": "model.layers.9.self_attn.o_proj.q_scale",
"shape": [
1024,
32
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 65536,
"byteOffset": 31236096
},
{
"name": "model.norm.weight",
"shape": [
1024
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2048,
"byteOffset": 31301632
}
],
"md5sum": "087583ac4f7465ef497959dbc5802c8b"
}
]
}