CharlieFRuan's picture
Upload folder using huggingface_hub
34e9049 verified
{
"metadata": {
"ParamSize": 267,
"ParamBytes": 277996288.0,
"BitsPerParam": 4.501665573729716
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 68067328,
"records": [
{
"name": "model.embed_tokens.q_weight",
"shape": [
151936,
112
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 68067328,
"byteOffset": 0
}
],
"md5sum": "d18e2f2cdb906934bd61dc55f8c53734"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 33234176,
"records": [
{
"name": "model.embed_tokens.q_scale",
"shape": [
151936,
28
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8508416,
"byteOffset": 0
},
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1792,
"byteOffset": 8508416
},
{
"name": "model.layers.0.mlp.down_proj.q_weight",
"shape": [
608,
896
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2179072,
"byteOffset": 8510208
},
{
"name": "model.layers.0.mlp.down_proj.q_scale",
"shape": [
152,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 272384,
"byteOffset": 10689280
},
{
"name": "model.layers.0.mlp.gate_up_proj.q_weight",
"shape": [
112,
9728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4358144,
"byteOffset": 10961664
},
{
"name": "model.layers.0.mlp.gate_up_proj.q_scale",
"shape": [
28,
9728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 544768,
"byteOffset": 15319808
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1792,
"byteOffset": 15864576
},
{
"name": "model.layers.0.self_attn.c_attn.bias",
"shape": [
1152
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2304,
"byteOffset": 15866368
},
{
"name": "model.layers.0.self_attn.c_attn.q_weight",
"shape": [
112,
1152
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 516096,
"byteOffset": 15868672
},
{
"name": "model.layers.0.self_attn.c_attn.q_scale",
"shape": [
28,
1152
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 64512,
"byteOffset": 16384768
},
{
"name": "model.layers.0.self_attn.o_proj.q_weight",
"shape": [
112,
896
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 401408,
"byteOffset": 16449280
},
{
"name": "model.layers.0.self_attn.o_proj.q_scale",
"shape": [
28,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50176,
"byteOffset": 16850688
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1792,
"byteOffset": 16900864
},
{
"name": "model.layers.1.mlp.down_proj.q_weight",
"shape": [
608,
896
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2179072,
"byteOffset": 16902656
},
{
"name": "model.layers.1.mlp.down_proj.q_scale",
"shape": [
152,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 272384,
"byteOffset": 19081728
},
{
"name": "model.layers.1.mlp.gate_up_proj.q_weight",
"shape": [
112,
9728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4358144,
"byteOffset": 19354112
},
{
"name": "model.layers.1.mlp.gate_up_proj.q_scale",
"shape": [
28,
9728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 544768,
"byteOffset": 23712256
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1792,
"byteOffset": 24257024
},
{
"name": "model.layers.1.self_attn.c_attn.bias",
"shape": [
1152
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2304,
"byteOffset": 24258816
},
{
"name": "model.layers.1.self_attn.c_attn.q_weight",
"shape": [
112,
1152
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 516096,
"byteOffset": 24261120
},
{
"name": "model.layers.1.self_attn.c_attn.q_scale",
"shape": [
28,
1152
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 64512,
"byteOffset": 24777216
},
{
"name": "model.layers.1.self_attn.o_proj.q_weight",
"shape": [
112,
896
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 401408,
"byteOffset": 24841728
},
{
"name": "model.layers.1.self_attn.o_proj.q_scale",
"shape": [
28,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50176,
"byteOffset": 25243136
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1792,
"byteOffset": 25293312
},
{
"name": "model.layers.10.mlp.down_proj.q_weight",
"shape": [
608,
896
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2179072,
"byteOffset": 25295104
},
{
"name": "model.layers.10.mlp.down_proj.q_scale",
"shape": [
152,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 272384,
"byteOffset": 27474176
},
{
"name": "model.layers.10.mlp.gate_up_proj.q_weight",
"shape": [
112,
9728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4358144,
"byteOffset": 27746560
},
{
"name": "model.layers.10.mlp.gate_up_proj.q_scale",
"shape": [
28,
9728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 544768,
"byteOffset": 32104704
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1792,
"byteOffset": 32649472
},
{
"name": "model.layers.10.self_attn.c_attn.bias",
"shape": [
1152
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2304,
"byteOffset": 32651264
},
{
"name": "model.layers.10.self_attn.c_attn.q_weight",
"shape": [
112,
1152
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 516096,
"byteOffset": 32653568
},
{
"name": "model.layers.10.self_attn.c_attn.q_scale",
"shape": [
28,
1152
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 64512,
"byteOffset": 33169664
}
],
"md5sum": "fb4e42666797f378eb2f8052397485f8"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 33505280,
"records": [
{
"name": "model.layers.10.self_attn.o_proj.q_weight",
"shape": [
112,
896
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 401408,
"byteOffset": 0
},
{
"name": "model.layers.10.self_attn.o_proj.q_scale",
"shape": [
28,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50176,
"byteOffset": 401408
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1792,
"byteOffset": 451584
},
{
"name": "model.layers.11.mlp.down_proj.q_weight",
"shape": [
608,
896
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2179072,
"byteOffset": 453376
},
{
"name": "model.layers.11.mlp.down_proj.q_scale",
"shape": [
152,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 272384,
"byteOffset": 2632448
},
{
"name": "model.layers.11.mlp.gate_up_proj.q_weight",
"shape": [
112,
9728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4358144,
"byteOffset": 2904832
},
{
"name": "model.layers.11.mlp.gate_up_proj.q_scale",
"shape": [
28,
9728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 544768,
"byteOffset": 7262976
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1792,
"byteOffset": 7807744
},
{
"name": "model.layers.11.self_attn.c_attn.bias",
"shape": [
1152
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2304,
"byteOffset": 7809536
},
{
"name": "model.layers.11.self_attn.c_attn.q_weight",
"shape": [
112,
1152
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 516096,
"byteOffset": 7811840
},
{
"name": "model.layers.11.self_attn.c_attn.q_scale",
"shape": [
28,
1152
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 64512,
"byteOffset": 8327936
},
{
"name": "model.layers.11.self_attn.o_proj.q_weight",
"shape": [
112,
896
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 401408,
"byteOffset": 8392448
},
{
"name": "model.layers.11.self_attn.o_proj.q_scale",
"shape": [
28,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50176,
"byteOffset": 8793856
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1792,
"byteOffset": 8844032
},
{
"name": "model.layers.12.mlp.down_proj.q_weight",
"shape": [
608,
896
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2179072,
"byteOffset": 8845824
},
{
"name": "model.layers.12.mlp.down_proj.q_scale",
"shape": [
152,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 272384,
"byteOffset": 11024896
},
{
"name": "model.layers.12.mlp.gate_up_proj.q_weight",
"shape": [
112,
9728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4358144,
"byteOffset": 11297280
},
{
"name": "model.layers.12.mlp.gate_up_proj.q_scale",
"shape": [
28,
9728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 544768,
"byteOffset": 15655424
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1792,
"byteOffset": 16200192
},
{
"name": "model.layers.12.self_attn.c_attn.bias",
"shape": [
1152
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2304,
"byteOffset": 16201984
},
{
"name": "model.layers.12.self_attn.c_attn.q_weight",
"shape": [
112,
1152
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 516096,
"byteOffset": 16204288
},
{
"name": "model.layers.12.self_attn.c_attn.q_scale",
"shape": [
28,
1152
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 64512,
"byteOffset": 16720384
},
{
"name": "model.layers.12.self_attn.o_proj.q_weight",
"shape": [
112,
896
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 401408,
"byteOffset": 16784896
},
{
"name": "model.layers.12.self_attn.o_proj.q_scale",
"shape": [
28,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50176,
"byteOffset": 17186304
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1792,
"byteOffset": 17236480
},
{
"name": "model.layers.13.mlp.down_proj.q_weight",
"shape": [
608,
896
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2179072,
"byteOffset": 17238272
},
{
"name": "model.layers.13.mlp.down_proj.q_scale",
"shape": [
152,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 272384,
"byteOffset": 19417344
},
{
"name": "model.layers.13.mlp.gate_up_proj.q_weight",
"shape": [
112,
9728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4358144,
"byteOffset": 19689728
},
{
"name": "model.layers.13.mlp.gate_up_proj.q_scale",
"shape": [
28,
9728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 544768,
"byteOffset": 24047872
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1792,
"byteOffset": 24592640
},
{
"name": "model.layers.13.self_attn.c_attn.bias",
"shape": [
1152
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2304,
"byteOffset": 24594432
},
{
"name": "model.layers.13.self_attn.c_attn.q_weight",
"shape": [
112,
1152
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 516096,
"byteOffset": 24596736
},
{
"name": "model.layers.13.self_attn.c_attn.q_scale",
"shape": [
28,
1152
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 64512,
"byteOffset": 25112832
},
{
"name": "model.layers.13.self_attn.o_proj.q_weight",
"shape": [
112,
896
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 401408,
"byteOffset": 25177344
},
{
"name": "model.layers.13.self_attn.o_proj.q_scale",
"shape": [
28,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50176,
"byteOffset": 25578752
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1792,
"byteOffset": 25628928
},
{
"name": "model.layers.14.mlp.down_proj.q_weight",
"shape": [
608,
896
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2179072,
"byteOffset": 25630720
},
{
"name": "model.layers.14.mlp.down_proj.q_scale",
"shape": [
152,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 272384,
"byteOffset": 27809792
},
{
"name": "model.layers.14.mlp.gate_up_proj.q_weight",
"shape": [
112,
9728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4358144,
"byteOffset": 28082176
},
{
"name": "model.layers.14.mlp.gate_up_proj.q_scale",
"shape": [
28,
9728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 544768,
"byteOffset": 32440320
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1792,
"byteOffset": 32985088
},
{
"name": "model.layers.14.self_attn.c_attn.bias",
"shape": [
1152
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2304,
"byteOffset": 32986880
},
{
"name": "model.layers.14.self_attn.c_attn.q_weight",
"shape": [
112,
1152
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 516096,
"byteOffset": 32989184
}
],
"md5sum": "86688fa8fa922ff75b1c25f40327cc3f"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 33053696,
"records": [
{
"name": "model.layers.14.self_attn.c_attn.q_scale",
"shape": [
28,
1152
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 64512,
"byteOffset": 0
},
{
"name": "model.layers.14.self_attn.o_proj.q_weight",
"shape": [
112,
896
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 401408,
"byteOffset": 64512
},
{
"name": "model.layers.14.self_attn.o_proj.q_scale",
"shape": [
28,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50176,
"byteOffset": 465920
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1792,
"byteOffset": 516096
},
{
"name": "model.layers.15.mlp.down_proj.q_weight",
"shape": [
608,
896
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2179072,
"byteOffset": 517888
},
{
"name": "model.layers.15.mlp.down_proj.q_scale",
"shape": [
152,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 272384,
"byteOffset": 2696960
},
{
"name": "model.layers.15.mlp.gate_up_proj.q_weight",
"shape": [
112,
9728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4358144,
"byteOffset": 2969344
},
{
"name": "model.layers.15.mlp.gate_up_proj.q_scale",
"shape": [
28,
9728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 544768,
"byteOffset": 7327488
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1792,
"byteOffset": 7872256
},
{
"name": "model.layers.15.self_attn.c_attn.bias",
"shape": [
1152
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2304,
"byteOffset": 7874048
},
{
"name": "model.layers.15.self_attn.c_attn.q_weight",
"shape": [
112,
1152
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 516096,
"byteOffset": 7876352
},
{
"name": "model.layers.15.self_attn.c_attn.q_scale",
"shape": [
28,
1152
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 64512,
"byteOffset": 8392448
},
{
"name": "model.layers.15.self_attn.o_proj.q_weight",
"shape": [
112,
896
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 401408,
"byteOffset": 8456960
},
{
"name": "model.layers.15.self_attn.o_proj.q_scale",
"shape": [
28,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50176,
"byteOffset": 8858368
},
{
"name": "model.layers.16.input_layernorm.weight",
"shape": [
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1792,
"byteOffset": 8908544
},
{
"name": "model.layers.16.mlp.down_proj.q_weight",
"shape": [
608,
896
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2179072,
"byteOffset": 8910336
},
{
"name": "model.layers.16.mlp.down_proj.q_scale",
"shape": [
152,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 272384,
"byteOffset": 11089408
},
{
"name": "model.layers.16.mlp.gate_up_proj.q_weight",
"shape": [
112,
9728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4358144,
"byteOffset": 11361792
},
{
"name": "model.layers.16.mlp.gate_up_proj.q_scale",
"shape": [
28,
9728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 544768,
"byteOffset": 15719936
},
{
"name": "model.layers.16.post_attention_layernorm.weight",
"shape": [
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1792,
"byteOffset": 16264704
},
{
"name": "model.layers.16.self_attn.c_attn.bias",
"shape": [
1152
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2304,
"byteOffset": 16266496
},
{
"name": "model.layers.16.self_attn.c_attn.q_weight",
"shape": [
112,
1152
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 516096,
"byteOffset": 16268800
},
{
"name": "model.layers.16.self_attn.c_attn.q_scale",
"shape": [
28,
1152
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 64512,
"byteOffset": 16784896
},
{
"name": "model.layers.16.self_attn.o_proj.q_weight",
"shape": [
112,
896
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 401408,
"byteOffset": 16849408
},
{
"name": "model.layers.16.self_attn.o_proj.q_scale",
"shape": [
28,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50176,
"byteOffset": 17250816
},
{
"name": "model.layers.17.input_layernorm.weight",
"shape": [
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1792,
"byteOffset": 17300992
},
{
"name": "model.layers.17.mlp.down_proj.q_weight",
"shape": [
608,
896
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2179072,
"byteOffset": 17302784
},
{
"name": "model.layers.17.mlp.down_proj.q_scale",
"shape": [
152,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 272384,
"byteOffset": 19481856
},
{
"name": "model.layers.17.mlp.gate_up_proj.q_weight",
"shape": [
112,
9728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4358144,
"byteOffset": 19754240
},
{
"name": "model.layers.17.mlp.gate_up_proj.q_scale",
"shape": [
28,
9728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 544768,
"byteOffset": 24112384
},
{
"name": "model.layers.17.post_attention_layernorm.weight",
"shape": [
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1792,
"byteOffset": 24657152
},
{
"name": "model.layers.17.self_attn.c_attn.bias",
"shape": [
1152
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2304,
"byteOffset": 24658944
},
{
"name": "model.layers.17.self_attn.c_attn.q_weight",
"shape": [
112,
1152
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 516096,
"byteOffset": 24661248
},
{
"name": "model.layers.17.self_attn.c_attn.q_scale",
"shape": [
28,
1152
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 64512,
"byteOffset": 25177344
},
{
"name": "model.layers.17.self_attn.o_proj.q_weight",
"shape": [
112,
896
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 401408,
"byteOffset": 25241856
},
{
"name": "model.layers.17.self_attn.o_proj.q_scale",
"shape": [
28,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50176,
"byteOffset": 25643264
},
{
"name": "model.layers.18.input_layernorm.weight",
"shape": [
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1792,
"byteOffset": 25693440
},
{
"name": "model.layers.18.mlp.down_proj.q_weight",
"shape": [
608,
896
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2179072,
"byteOffset": 25695232
},
{
"name": "model.layers.18.mlp.down_proj.q_scale",
"shape": [
152,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 272384,
"byteOffset": 27874304
},
{
"name": "model.layers.18.mlp.gate_up_proj.q_weight",
"shape": [
112,
9728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4358144,
"byteOffset": 28146688
},
{
"name": "model.layers.18.mlp.gate_up_proj.q_scale",
"shape": [
28,
9728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 544768,
"byteOffset": 32504832
},
{
"name": "model.layers.18.post_attention_layernorm.weight",
"shape": [
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1792,
"byteOffset": 33049600
},
{
"name": "model.layers.18.self_attn.c_attn.bias",
"shape": [
1152
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2304,
"byteOffset": 33051392
}
],
"md5sum": "b8dbb11271cd3ace5637744ba394edad"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 33020928,
"records": [
{
"name": "model.layers.18.self_attn.c_attn.q_weight",
"shape": [
112,
1152
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 516096,
"byteOffset": 0
},
{
"name": "model.layers.18.self_attn.c_attn.q_scale",
"shape": [
28,
1152
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 64512,
"byteOffset": 516096
},
{
"name": "model.layers.18.self_attn.o_proj.q_weight",
"shape": [
112,
896
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 401408,
"byteOffset": 580608
},
{
"name": "model.layers.18.self_attn.o_proj.q_scale",
"shape": [
28,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50176,
"byteOffset": 982016
},
{
"name": "model.layers.19.input_layernorm.weight",
"shape": [
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1792,
"byteOffset": 1032192
},
{
"name": "model.layers.19.mlp.down_proj.q_weight",
"shape": [
608,
896
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2179072,
"byteOffset": 1033984
},
{
"name": "model.layers.19.mlp.down_proj.q_scale",
"shape": [
152,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 272384,
"byteOffset": 3213056
},
{
"name": "model.layers.19.mlp.gate_up_proj.q_weight",
"shape": [
112,
9728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4358144,
"byteOffset": 3485440
},
{
"name": "model.layers.19.mlp.gate_up_proj.q_scale",
"shape": [
28,
9728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 544768,
"byteOffset": 7843584
},
{
"name": "model.layers.19.post_attention_layernorm.weight",
"shape": [
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1792,
"byteOffset": 8388352
},
{
"name": "model.layers.19.self_attn.c_attn.bias",
"shape": [
1152
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2304,
"byteOffset": 8390144
},
{
"name": "model.layers.19.self_attn.c_attn.q_weight",
"shape": [
112,
1152
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 516096,
"byteOffset": 8392448
},
{
"name": "model.layers.19.self_attn.c_attn.q_scale",
"shape": [
28,
1152
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 64512,
"byteOffset": 8908544
},
{
"name": "model.layers.19.self_attn.o_proj.q_weight",
"shape": [
112,
896
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 401408,
"byteOffset": 8973056
},
{
"name": "model.layers.19.self_attn.o_proj.q_scale",
"shape": [
28,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50176,
"byteOffset": 9374464
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1792,
"byteOffset": 9424640
},
{
"name": "model.layers.2.mlp.down_proj.q_weight",
"shape": [
608,
896
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2179072,
"byteOffset": 9426432
},
{
"name": "model.layers.2.mlp.down_proj.q_scale",
"shape": [
152,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 272384,
"byteOffset": 11605504
},
{
"name": "model.layers.2.mlp.gate_up_proj.q_weight",
"shape": [
112,
9728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4358144,
"byteOffset": 11877888
},
{
"name": "model.layers.2.mlp.gate_up_proj.q_scale",
"shape": [
28,
9728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 544768,
"byteOffset": 16236032
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1792,
"byteOffset": 16780800
},
{
"name": "model.layers.2.self_attn.c_attn.bias",
"shape": [
1152
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2304,
"byteOffset": 16782592
},
{
"name": "model.layers.2.self_attn.c_attn.q_weight",
"shape": [
112,
1152
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 516096,
"byteOffset": 16784896
},
{
"name": "model.layers.2.self_attn.c_attn.q_scale",
"shape": [
28,
1152
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 64512,
"byteOffset": 17300992
},
{
"name": "model.layers.2.self_attn.o_proj.q_weight",
"shape": [
112,
896
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 401408,
"byteOffset": 17365504
},
{
"name": "model.layers.2.self_attn.o_proj.q_scale",
"shape": [
28,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50176,
"byteOffset": 17766912
},
{
"name": "model.layers.20.input_layernorm.weight",
"shape": [
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1792,
"byteOffset": 17817088
},
{
"name": "model.layers.20.mlp.down_proj.q_weight",
"shape": [
608,
896
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2179072,
"byteOffset": 17818880
},
{
"name": "model.layers.20.mlp.down_proj.q_scale",
"shape": [
152,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 272384,
"byteOffset": 19997952
},
{
"name": "model.layers.20.mlp.gate_up_proj.q_weight",
"shape": [
112,
9728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4358144,
"byteOffset": 20270336
},
{
"name": "model.layers.20.mlp.gate_up_proj.q_scale",
"shape": [
28,
9728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 544768,
"byteOffset": 24628480
},
{
"name": "model.layers.20.post_attention_layernorm.weight",
"shape": [
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1792,
"byteOffset": 25173248
},
{
"name": "model.layers.20.self_attn.c_attn.bias",
"shape": [
1152
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2304,
"byteOffset": 25175040
},
{
"name": "model.layers.20.self_attn.c_attn.q_weight",
"shape": [
112,
1152
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 516096,
"byteOffset": 25177344
},
{
"name": "model.layers.20.self_attn.c_attn.q_scale",
"shape": [
28,
1152
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 64512,
"byteOffset": 25693440
},
{
"name": "model.layers.20.self_attn.o_proj.q_weight",
"shape": [
112,
896
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 401408,
"byteOffset": 25757952
},
{
"name": "model.layers.20.self_attn.o_proj.q_scale",
"shape": [
28,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50176,
"byteOffset": 26159360
},
{
"name": "model.layers.21.input_layernorm.weight",
"shape": [
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1792,
"byteOffset": 26209536
},
{
"name": "model.layers.21.mlp.down_proj.q_weight",
"shape": [
608,
896
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2179072,
"byteOffset": 26211328
},
{
"name": "model.layers.21.mlp.down_proj.q_scale",
"shape": [
152,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 272384,
"byteOffset": 28390400
},
{
"name": "model.layers.21.mlp.gate_up_proj.q_weight",
"shape": [
112,
9728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4358144,
"byteOffset": 28662784
}
],
"md5sum": "4ca5d8efca18c6e10bac09311ae70c97"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 29211648,
"records": [
{
"name": "model.layers.21.mlp.gate_up_proj.q_scale",
"shape": [
28,
9728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 544768,
"byteOffset": 0
},
{
"name": "model.layers.21.post_attention_layernorm.weight",
"shape": [
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1792,
"byteOffset": 544768
},
{
"name": "model.layers.21.self_attn.c_attn.bias",
"shape": [
1152
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2304,
"byteOffset": 546560
},
{
"name": "model.layers.21.self_attn.c_attn.q_weight",
"shape": [
112,
1152
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 516096,
"byteOffset": 548864
},
{
"name": "model.layers.21.self_attn.c_attn.q_scale",
"shape": [
28,
1152
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 64512,
"byteOffset": 1064960
},
{
"name": "model.layers.21.self_attn.o_proj.q_weight",
"shape": [
112,
896
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 401408,
"byteOffset": 1129472
},
{
"name": "model.layers.21.self_attn.o_proj.q_scale",
"shape": [
28,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50176,
"byteOffset": 1530880
},
{
"name": "model.layers.22.input_layernorm.weight",
"shape": [
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1792,
"byteOffset": 1581056
},
{
"name": "model.layers.22.mlp.down_proj.q_weight",
"shape": [
608,
896
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2179072,
"byteOffset": 1582848
},
{
"name": "model.layers.22.mlp.down_proj.q_scale",
"shape": [
152,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 272384,
"byteOffset": 3761920
},
{
"name": "model.layers.22.mlp.gate_up_proj.q_weight",
"shape": [
112,
9728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4358144,
"byteOffset": 4034304
},
{
"name": "model.layers.22.mlp.gate_up_proj.q_scale",
"shape": [
28,
9728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 544768,
"byteOffset": 8392448
},
{
"name": "model.layers.22.post_attention_layernorm.weight",
"shape": [
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1792,
"byteOffset": 8937216
},
{
"name": "model.layers.22.self_attn.c_attn.bias",
"shape": [
1152
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2304,
"byteOffset": 8939008
},
{
"name": "model.layers.22.self_attn.c_attn.q_weight",
"shape": [
112,
1152
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 516096,
"byteOffset": 8941312
},
{
"name": "model.layers.22.self_attn.c_attn.q_scale",
"shape": [
28,
1152
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 64512,
"byteOffset": 9457408
},
{
"name": "model.layers.22.self_attn.o_proj.q_weight",
"shape": [
112,
896
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 401408,
"byteOffset": 9521920
},
{
"name": "model.layers.22.self_attn.o_proj.q_scale",
"shape": [
28,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50176,
"byteOffset": 9923328
},
{
"name": "model.layers.23.input_layernorm.weight",
"shape": [
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1792,
"byteOffset": 9973504
},
{
"name": "model.layers.23.mlp.down_proj.q_weight",
"shape": [
608,
896
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2179072,
"byteOffset": 9975296
},
{
"name": "model.layers.23.mlp.down_proj.q_scale",
"shape": [
152,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 272384,
"byteOffset": 12154368
},
{
"name": "model.layers.23.mlp.gate_up_proj.q_weight",
"shape": [
112,
9728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4358144,
"byteOffset": 12426752
},
{
"name": "model.layers.23.mlp.gate_up_proj.q_scale",
"shape": [
28,
9728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 544768,
"byteOffset": 16784896
},
{
"name": "model.layers.23.post_attention_layernorm.weight",
"shape": [
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1792,
"byteOffset": 17329664
},
{
"name": "model.layers.23.self_attn.c_attn.bias",
"shape": [
1152
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2304,
"byteOffset": 17331456
},
{
"name": "model.layers.23.self_attn.c_attn.q_weight",
"shape": [
112,
1152
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 516096,
"byteOffset": 17333760
},
{
"name": "model.layers.23.self_attn.c_attn.q_scale",
"shape": [
28,
1152
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 64512,
"byteOffset": 17849856
},
{
"name": "model.layers.23.self_attn.o_proj.q_weight",
"shape": [
112,
896
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 401408,
"byteOffset": 17914368
},
{
"name": "model.layers.23.self_attn.o_proj.q_scale",
"shape": [
28,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50176,
"byteOffset": 18315776
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1792,
"byteOffset": 18365952
},
{
"name": "model.layers.3.mlp.down_proj.q_weight",
"shape": [
608,
896
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2179072,
"byteOffset": 18367744
},
{
"name": "model.layers.3.mlp.down_proj.q_scale",
"shape": [
152,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 272384,
"byteOffset": 20546816
},
{
"name": "model.layers.3.mlp.gate_up_proj.q_weight",
"shape": [
112,
9728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4358144,
"byteOffset": 20819200
},
{
"name": "model.layers.3.mlp.gate_up_proj.q_scale",
"shape": [
28,
9728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 544768,
"byteOffset": 25177344
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1792,
"byteOffset": 25722112
},
{
"name": "model.layers.3.self_attn.c_attn.bias",
"shape": [
1152
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2304,
"byteOffset": 25723904
},
{
"name": "model.layers.3.self_attn.c_attn.q_weight",
"shape": [
112,
1152
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 516096,
"byteOffset": 25726208
},
{
"name": "model.layers.3.self_attn.c_attn.q_scale",
"shape": [
28,
1152
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 64512,
"byteOffset": 26242304
},
{
"name": "model.layers.3.self_attn.o_proj.q_weight",
"shape": [
112,
896
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 401408,
"byteOffset": 26306816
},
{
"name": "model.layers.3.self_attn.o_proj.q_scale",
"shape": [
28,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50176,
"byteOffset": 26708224
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1792,
"byteOffset": 26758400
},
{
"name": "model.layers.4.mlp.down_proj.q_weight",
"shape": [
608,
896
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2179072,
"byteOffset": 26760192
},
{
"name": "model.layers.4.mlp.down_proj.q_scale",
"shape": [
152,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 272384,
"byteOffset": 28939264
}
],
"md5sum": "5e85bca69ff4405032e129828b7e522c"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 33297408,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.q_weight",
"shape": [
112,
9728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4358144,
"byteOffset": 0
},
{
"name": "model.layers.4.mlp.gate_up_proj.q_scale",
"shape": [
28,
9728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 544768,
"byteOffset": 4358144
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1792,
"byteOffset": 4902912
},
{
"name": "model.layers.4.self_attn.c_attn.bias",
"shape": [
1152
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2304,
"byteOffset": 4904704
},
{
"name": "model.layers.4.self_attn.c_attn.q_weight",
"shape": [
112,
1152
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 516096,
"byteOffset": 4907008
},
{
"name": "model.layers.4.self_attn.c_attn.q_scale",
"shape": [
28,
1152
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 64512,
"byteOffset": 5423104
},
{
"name": "model.layers.4.self_attn.o_proj.q_weight",
"shape": [
112,
896
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 401408,
"byteOffset": 5487616
},
{
"name": "model.layers.4.self_attn.o_proj.q_scale",
"shape": [
28,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50176,
"byteOffset": 5889024
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1792,
"byteOffset": 5939200
},
{
"name": "model.layers.5.mlp.down_proj.q_weight",
"shape": [
608,
896
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2179072,
"byteOffset": 5940992
},
{
"name": "model.layers.5.mlp.down_proj.q_scale",
"shape": [
152,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 272384,
"byteOffset": 8120064
},
{
"name": "model.layers.5.mlp.gate_up_proj.q_weight",
"shape": [
112,
9728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4358144,
"byteOffset": 8392448
},
{
"name": "model.layers.5.mlp.gate_up_proj.q_scale",
"shape": [
28,
9728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 544768,
"byteOffset": 12750592
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1792,
"byteOffset": 13295360
},
{
"name": "model.layers.5.self_attn.c_attn.bias",
"shape": [
1152
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2304,
"byteOffset": 13297152
},
{
"name": "model.layers.5.self_attn.c_attn.q_weight",
"shape": [
112,
1152
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 516096,
"byteOffset": 13299456
},
{
"name": "model.layers.5.self_attn.c_attn.q_scale",
"shape": [
28,
1152
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 64512,
"byteOffset": 13815552
},
{
"name": "model.layers.5.self_attn.o_proj.q_weight",
"shape": [
112,
896
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 401408,
"byteOffset": 13880064
},
{
"name": "model.layers.5.self_attn.o_proj.q_scale",
"shape": [
28,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50176,
"byteOffset": 14281472
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1792,
"byteOffset": 14331648
},
{
"name": "model.layers.6.mlp.down_proj.q_weight",
"shape": [
608,
896
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2179072,
"byteOffset": 14333440
},
{
"name": "model.layers.6.mlp.down_proj.q_scale",
"shape": [
152,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 272384,
"byteOffset": 16512512
},
{
"name": "model.layers.6.mlp.gate_up_proj.q_weight",
"shape": [
112,
9728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4358144,
"byteOffset": 16784896
},
{
"name": "model.layers.6.mlp.gate_up_proj.q_scale",
"shape": [
28,
9728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 544768,
"byteOffset": 21143040
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1792,
"byteOffset": 21687808
},
{
"name": "model.layers.6.self_attn.c_attn.bias",
"shape": [
1152
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2304,
"byteOffset": 21689600
},
{
"name": "model.layers.6.self_attn.c_attn.q_weight",
"shape": [
112,
1152
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 516096,
"byteOffset": 21691904
},
{
"name": "model.layers.6.self_attn.c_attn.q_scale",
"shape": [
28,
1152
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 64512,
"byteOffset": 22208000
},
{
"name": "model.layers.6.self_attn.o_proj.q_weight",
"shape": [
112,
896
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 401408,
"byteOffset": 22272512
},
{
"name": "model.layers.6.self_attn.o_proj.q_scale",
"shape": [
28,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50176,
"byteOffset": 22673920
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1792,
"byteOffset": 22724096
},
{
"name": "model.layers.7.mlp.down_proj.q_weight",
"shape": [
608,
896
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2179072,
"byteOffset": 22725888
},
{
"name": "model.layers.7.mlp.down_proj.q_scale",
"shape": [
152,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 272384,
"byteOffset": 24904960
},
{
"name": "model.layers.7.mlp.gate_up_proj.q_weight",
"shape": [
112,
9728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4358144,
"byteOffset": 25177344
},
{
"name": "model.layers.7.mlp.gate_up_proj.q_scale",
"shape": [
28,
9728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 544768,
"byteOffset": 29535488
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1792,
"byteOffset": 30080256
},
{
"name": "model.layers.7.self_attn.c_attn.bias",
"shape": [
1152
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2304,
"byteOffset": 30082048
},
{
"name": "model.layers.7.self_attn.c_attn.q_weight",
"shape": [
112,
1152
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 516096,
"byteOffset": 30084352
},
{
"name": "model.layers.7.self_attn.c_attn.q_scale",
"shape": [
28,
1152
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 64512,
"byteOffset": 30600448
},
{
"name": "model.layers.7.self_attn.o_proj.q_weight",
"shape": [
112,
896
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 401408,
"byteOffset": 30664960
},
{
"name": "model.layers.7.self_attn.o_proj.q_scale",
"shape": [
28,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50176,
"byteOffset": 31066368
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1792,
"byteOffset": 31116544
},
{
"name": "model.layers.8.mlp.down_proj.q_weight",
"shape": [
608,
896
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2179072,
"byteOffset": 31118336
}
],
"md5sum": "36034840023222d03d21279a1c06131f"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 14605824,
"records": [
{
"name": "model.layers.8.mlp.down_proj.q_scale",
"shape": [
152,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 272384,
"byteOffset": 0
},
{
"name": "model.layers.8.mlp.gate_up_proj.q_weight",
"shape": [
112,
9728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4358144,
"byteOffset": 272384
},
{
"name": "model.layers.8.mlp.gate_up_proj.q_scale",
"shape": [
28,
9728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 544768,
"byteOffset": 4630528
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1792,
"byteOffset": 5175296
},
{
"name": "model.layers.8.self_attn.c_attn.bias",
"shape": [
1152
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2304,
"byteOffset": 5177088
},
{
"name": "model.layers.8.self_attn.c_attn.q_weight",
"shape": [
112,
1152
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 516096,
"byteOffset": 5179392
},
{
"name": "model.layers.8.self_attn.c_attn.q_scale",
"shape": [
28,
1152
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 64512,
"byteOffset": 5695488
},
{
"name": "model.layers.8.self_attn.o_proj.q_weight",
"shape": [
112,
896
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 401408,
"byteOffset": 5760000
},
{
"name": "model.layers.8.self_attn.o_proj.q_scale",
"shape": [
28,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50176,
"byteOffset": 6161408
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1792,
"byteOffset": 6211584
},
{
"name": "model.layers.9.mlp.down_proj.q_weight",
"shape": [
608,
896
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2179072,
"byteOffset": 6213376
},
{
"name": "model.layers.9.mlp.down_proj.q_scale",
"shape": [
152,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 272384,
"byteOffset": 8392448
},
{
"name": "model.layers.9.mlp.gate_up_proj.q_weight",
"shape": [
112,
9728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4358144,
"byteOffset": 8664832
},
{
"name": "model.layers.9.mlp.gate_up_proj.q_scale",
"shape": [
28,
9728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 544768,
"byteOffset": 13022976
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1792,
"byteOffset": 13567744
},
{
"name": "model.layers.9.self_attn.c_attn.bias",
"shape": [
1152
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2304,
"byteOffset": 13569536
},
{
"name": "model.layers.9.self_attn.c_attn.q_weight",
"shape": [
112,
1152
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 516096,
"byteOffset": 13571840
},
{
"name": "model.layers.9.self_attn.c_attn.q_scale",
"shape": [
28,
1152
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 64512,
"byteOffset": 14087936
},
{
"name": "model.layers.9.self_attn.o_proj.q_weight",
"shape": [
112,
896
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 401408,
"byteOffset": 14152448
},
{
"name": "model.layers.9.self_attn.o_proj.q_scale",
"shape": [
28,
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 50176,
"byteOffset": 14553856
},
{
"name": "model.norm.weight",
"shape": [
896
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1792,
"byteOffset": 14604032
}
],
"md5sum": "95982973d35126d07f2d27f6dae6a48d"
}
]
}