Command-R-08-2024-q4f16_1-MLC / ndarray-cache.json
TNT3530's picture
Upload folder using huggingface_hub
1b70b84 verified
{
"metadata": {
"ParamSize": 443,
"ParamBytes": 18167250944.0,
"BitsPerParam": 4.500119595955907
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 1048576000,
"records": [
{
"name": "model.embed_tokens.q_weight",
"shape": [
256000,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 1048576000,
"byteOffset": 0
}
],
"md5sum": "c5d3083a8d2422daf2610c41497cec40"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 131072000,
"records": [
{
"name": "model.embed_tokens.q_scale",
"shape": [
256000,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 131072000,
"byteOffset": 0
}
],
"md5sum": "7803fac45026b4eb384f417c0280667d"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.0.mlp.gate_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "0afc7229eae1c639faf96b6260671d94"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.0.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "1b91984460baf05b58779a4c13305791"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.0.self_attn.out_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "f2c07997dfce37433288314e92a3f67e"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.0.mlp.down_proj.q_weight",
"shape": [
8192,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "3c1478590723af580bea955d3f87caa1"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 22036480,
"records": [
{
"name": "model.layers.0.mlp.gate_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.0.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 12582912
},
{
"name": "model.layers.0.self_attn.out_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 17825792
},
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 22020096
}
],
"md5sum": "42c89d53adbb12e7cae97c2ca6351a86"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.0.mlp.up_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "bfd2e487577efae3ee789fe63f196604"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.1.mlp.down_proj.q_weight",
"shape": [
8192,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "53f803d643cf981f6c351b99c2f05701"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 25182208,
"records": [
{
"name": "model.layers.0.mlp.down_proj.q_scale",
"shape": [
8192,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.0.mlp.up_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 12582912
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 25165824
}
],
"md5sum": "fed82e0fa7c3b451ac98c6fd77a50bcc"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.1.mlp.gate_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "e4901c356dd921daaaaabeb4e93ed045"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.1.mlp.up_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "8ba676e3cdbdc35fb3458a22934f2b54"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.1.mlp.down_proj.q_scale",
"shape": [
8192,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.1.mlp.gate_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 12582912
}
],
"md5sum": "5bbd566f342f53fd8d8dc1f8c73c630f"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.1.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "e5ee649a9f770cfa219cf6feadb25d1d"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.1.self_attn.out_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "079dd6ebb390f9d465f2223f6fe1d6da"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.2.mlp.down_proj.q_weight",
"shape": [
8192,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "adb32162fa406916fb0c014a6221067d"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 22036480,
"records": [
{
"name": "model.layers.1.mlp.up_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.1.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 12582912
},
{
"name": "model.layers.1.self_attn.out_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 17825792
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 22020096
}
],
"md5sum": "03a2807d916a555288256c9abe0cac88"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.2.mlp.gate_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "afe5bc10f587902b1c0ceaf606101bb3"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.2.mlp.up_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "944f559b66186092f678817827f2e2f0"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.2.mlp.down_proj.q_scale",
"shape": [
8192,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.2.mlp.gate_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 12582912
}
],
"md5sum": "84f09462e4dc4f6900d6b2a500544196"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.2.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "c76fcc04383e93295c80717faffb6050"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.2.self_attn.out_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "c1e7030e13b7630f71838a60849b7b95"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.3.mlp.gate_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "7b1950ca371f6ad968f75f410a8d9e1d"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 22020096,
"records": [
{
"name": "model.layers.2.mlp.up_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.2.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 12582912
},
{
"name": "model.layers.2.self_attn.out_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 17825792
}
],
"md5sum": "f9a63d3eef7c3c737abdbd0f76db0219"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.3.mlp.up_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "686d8883378b018a30e369c985db3335"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.3.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "90633cf233a2913b73fcbe1e30c7e457"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.3.self_attn.out_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "f2e9330bd5bb74b6ccb03170ccc241f1"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 30408704,
"records": [
{
"name": "model.layers.3.mlp.gate_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.3.mlp.up_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 12582912
},
{
"name": "model.layers.3.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 25165824
}
],
"md5sum": "725cbe3e3ffdbdcaaf19ab5fa56731d7"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.10.mlp.down_proj.q_weight",
"shape": [
8192,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "3da1933d4a1e62263e44f96c04cfb3d2"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.10.mlp.gate_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "edf897db0117039e83845b1c45698c64"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.10.mlp.up_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "4194510b9f1ce6b5089bec74593fb195"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 29376512,
"records": [
{
"name": "model.layers.3.self_attn.out_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 0
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 4194304
},
{
"name": "model.layers.10.mlp.down_proj.q_scale",
"shape": [
8192,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 4210688
},
{
"name": "model.layers.10.mlp.gate_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 16793600
}
],
"md5sum": "f48d4e86bddc15bb0e9a3b2b3a4591bc"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.11.mlp.down_proj.q_weight",
"shape": [
8192,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "0a269c9285d58e94a92c01de8a0a7f4c"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.11.mlp.gate_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "38d90096f46c2d30cb78ea7ba371a91b"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 25182208,
"records": [
{
"name": "model.layers.10.mlp.up_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 12582912
},
{
"name": "model.layers.11.mlp.down_proj.q_scale",
"shape": [
8192,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 12599296
}
],
"md5sum": "75db9789bf33dd881bc650534ae6e901"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.11.mlp.up_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "59947d599054123161fd0ea1ebbe4801"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.11.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "3406c5420dc22c98b96516fbbf92d0b0"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.11.self_attn.out_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "ca886e8e77d7b3e0da0818c48c83ab83"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 30408704,
"records": [
{
"name": "model.layers.11.mlp.gate_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.11.mlp.up_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 12582912
},
{
"name": "model.layers.11.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 25165824
}
],
"md5sum": "0c4bc3848e8a9967d79def11eb09efd2"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.12.mlp.down_proj.q_weight",
"shape": [
8192,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "b668d5aac90dcef2dc5a7a41e241ea6a"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.12.mlp.gate_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "1f3d89755746afdccb5154f349b8ba1c"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.12.mlp.up_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "44cdcb58cdf5bcbbbc51a39362e9ce75"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 29376512,
"records": [
{
"name": "model.layers.11.self_attn.out_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 0
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 4194304
},
{
"name": "model.layers.12.mlp.down_proj.q_scale",
"shape": [
8192,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 4210688
},
{
"name": "model.layers.12.mlp.gate_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 16793600
}
],
"md5sum": "da697ad66fefd5df7f471044783f7c46"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.12.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "fd7c16477b79550e26b4cec4ce235d5c"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.12.self_attn.out_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "c9d30538fb9ec34f56774cf9adeeaf3d"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.13.mlp.gate_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "f169df3a7bb7bb647c2d325d94f4607c"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 22020096,
"records": [
{
"name": "model.layers.12.mlp.up_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.12.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 12582912
},
{
"name": "model.layers.12.self_attn.out_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 17825792
}
],
"md5sum": "80e9f45401330eeb99ce84726a4d8498"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.13.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "d689223b8f75218b32b9b7f9a8969c1c"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.13.self_attn.out_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "037cbced0d23236d0d9ec3bf674e76cd"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.10.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "d6098063522b220cfd18c6b1d12535ac"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.10.self_attn.out_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "f406b7ebc6f7695e02c373f3a68af4b7"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.7.mlp.down_proj.q_weight",
"shape": [
8192,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "917e4c38575ec3db8da3e0c357d897c6"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 31473664,
"records": [
{
"name": "model.layers.13.mlp.gate_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.13.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 12582912
},
{
"name": "model.layers.13.self_attn.out_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 17825792
},
{
"name": "model.layers.10.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 22020096
},
{
"name": "model.layers.10.self_attn.out_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 27262976
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 31457280
}
],
"md5sum": "91f74fb6625af2bff9e50c2897e7d625"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.7.mlp.gate_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "ba69d653615b49c77dfd3cfeee076db9"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.7.mlp.up_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "ba8533f1fca7c25dfef335fd69d475e2"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.7.mlp.down_proj.q_scale",
"shape": [
8192,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.7.mlp.gate_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 12582912
}
],
"md5sum": "9da80bb55a6a4cbc8e1df3b54ce6f7f5"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.7.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "3b2c120b449054347cac15895aa84f29"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.7.self_attn.out_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "405f3edc7a45d9c744862da373043043"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.8.mlp.down_proj.q_weight",
"shape": [
8192,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "015e81dda9a55129afc029751083c070"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 22036480,
"records": [
{
"name": "model.layers.7.mlp.up_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.7.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 12582912
},
{
"name": "model.layers.7.self_attn.out_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 17825792
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 22020096
}
],
"md5sum": "9a534adb4949e7a2c050ee0d856b1c8f"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.8.mlp.gate_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "d51576deff9cf5bf75287e149bfb58b6"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.8.mlp.up_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "2d6ed7e5889220dde7b91c404c2d81cd"
},
{
"dataPath": "params_shard_62.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.8.mlp.down_proj.q_scale",
"shape": [
8192,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.8.mlp.gate_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 12582912
}
],
"md5sum": "2ed4c1b2b1a441971b058cd92f1e9268"
},
{
"dataPath": "params_shard_63.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.8.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "5ad96899a5ba1307226ed6b142d1b950"
},
{
"dataPath": "params_shard_64.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.8.self_attn.out_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "1ffa378d0d4fbf4a5008c8ea0b6cfb8b"
},
{
"dataPath": "params_shard_65.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.9.mlp.down_proj.q_weight",
"shape": [
8192,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "abe9dae34a4e82ca780e99d3caa4dca9"
},
{
"dataPath": "params_shard_66.bin",
"format": "raw-shard",
"nbytes": 22036480,
"records": [
{
"name": "model.layers.8.mlp.up_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.8.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 12582912
},
{
"name": "model.layers.8.self_attn.out_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 17825792
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 22020096
}
],
"md5sum": "18f917be7d3f8de0d35abeaf86dffa4d"
},
{
"dataPath": "params_shard_67.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.9.mlp.gate_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "548dad444ee926b5fc0136e18fdd0a7e"
},
{
"dataPath": "params_shard_68.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.9.mlp.up_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "24e98f9c20bdcc642d89c7bbbfb03b3d"
},
{
"dataPath": "params_shard_69.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.9.mlp.down_proj.q_scale",
"shape": [
8192,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.9.mlp.gate_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 12582912
}
],
"md5sum": "652b846ec021a280de26e927073de0fd"
},
{
"dataPath": "params_shard_70.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.9.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "f749f74fedf9a0f67bad4ae8ba29c7d5"
},
{
"dataPath": "params_shard_71.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.9.self_attn.out_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "c8ec70a873be313309ac711a95563115"
},
{
"dataPath": "params_shard_72.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.13.mlp.down_proj.q_weight",
"shape": [
8192,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "68281413defca344d3cf843f513edafb"
},
{
"dataPath": "params_shard_73.bin",
"format": "raw-shard",
"nbytes": 22036480,
"records": [
{
"name": "model.layers.9.mlp.up_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.9.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 12582912
},
{
"name": "model.layers.9.self_attn.out_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 17825792
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 22020096
}
],
"md5sum": "0ad094bea81b706b64c3be254291f8f2"
},
{
"dataPath": "params_shard_74.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.13.mlp.up_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "c7a1dd448ed473d7b99969ea577b2924"
},
{
"dataPath": "params_shard_75.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.14.mlp.down_proj.q_weight",
"shape": [
8192,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "8d3d7f403e4c860f9ed4880653581669"
},
{
"dataPath": "params_shard_76.bin",
"format": "raw-shard",
"nbytes": 25182208,
"records": [
{
"name": "model.layers.13.mlp.down_proj.q_scale",
"shape": [
8192,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.13.mlp.up_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 12582912
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 25165824
}
],
"md5sum": "fa43d51bffbea4113a199e44b2a459c9"
},
{
"dataPath": "params_shard_77.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.14.mlp.gate_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "f08490dfe9dc90d32d434416b536ccd6"
},
{
"dataPath": "params_shard_78.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.14.mlp.up_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "81cb7466454d52fcf006af172f19f450"
},
{
"dataPath": "params_shard_79.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.14.mlp.down_proj.q_scale",
"shape": [
8192,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.14.mlp.gate_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 12582912
}
],
"md5sum": "76da0419d600b090617b43b7a2e3dbac"
},
{
"dataPath": "params_shard_80.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.14.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "94a248d8a4f6ef8f84ccc450847924c7"
},
{
"dataPath": "params_shard_81.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.14.self_attn.out_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "ff0db7ff6165542413f02e4cd2c1985f"
},
{
"dataPath": "params_shard_82.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.15.mlp.down_proj.q_weight",
"shape": [
8192,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "c93767584728faabfff9ecb2b58b9a0e"
},
{
"dataPath": "params_shard_83.bin",
"format": "raw-shard",
"nbytes": 22036480,
"records": [
{
"name": "model.layers.14.mlp.up_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.14.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 12582912
},
{
"name": "model.layers.14.self_attn.out_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 17825792
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 22020096
}
],
"md5sum": "478bb4e7a1bf868a5885152a04369330"
},
{
"dataPath": "params_shard_84.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.15.mlp.gate_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "65c906cc2701e6f2d2a84304ce773744"
},
{
"dataPath": "params_shard_85.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.15.mlp.up_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "6ec4687ee858e51f77d1c4fe4f204473"
},
{
"dataPath": "params_shard_86.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.15.mlp.down_proj.q_scale",
"shape": [
8192,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.15.mlp.gate_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 12582912
}
],
"md5sum": "e3dab2848244f765e36d9196a78ebd94"
},
{
"dataPath": "params_shard_87.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.15.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "0ec87181e257c60eb1a2ebbb7021308e"
},
{
"dataPath": "params_shard_88.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.15.self_attn.out_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "19fb8028f448d429be7606a231d850d1"
},
{
"dataPath": "params_shard_89.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.16.mlp.gate_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "7f5444a1116a8c987a889d09883dba66"
},
{
"dataPath": "params_shard_90.bin",
"format": "raw-shard",
"nbytes": 22020096,
"records": [
{
"name": "model.layers.15.mlp.up_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.15.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 12582912
},
{
"name": "model.layers.15.self_attn.out_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 17825792
}
],
"md5sum": "11fcdec78b92b76958e71655980c98fd"
},
{
"dataPath": "params_shard_91.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.16.mlp.up_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "1cbffa7320e0a769fae1e4dbb33e6400"
},
{
"dataPath": "params_shard_92.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.16.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "1127a41b2436a11b54e120422130069a"
},
{
"dataPath": "params_shard_93.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.16.self_attn.out_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "2742c6ae91c91130bcaf0111960058a1"
},
{
"dataPath": "params_shard_94.bin",
"format": "raw-shard",
"nbytes": 30408704,
"records": [
{
"name": "model.layers.16.mlp.gate_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.16.mlp.up_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 12582912
},
{
"name": "model.layers.16.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 25165824
}
],
"md5sum": "4c78f8190bdc62f552d8984ce90fb903"
},
{
"dataPath": "params_shard_95.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.16.mlp.down_proj.q_weight",
"shape": [
8192,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "fb46ae4159883080a34d6b441f33f917"
},
{
"dataPath": "params_shard_96.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.17.mlp.down_proj.q_weight",
"shape": [
8192,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "aae771fb01ecd01f5d37d1d159aff989"
},
{
"dataPath": "params_shard_97.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.17.mlp.gate_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "7869cd97245e9cb5bfb7fedeb1ca0243"
},
{
"dataPath": "params_shard_98.bin",
"format": "raw-shard",
"nbytes": 29392896,
"records": [
{
"name": "model.layers.16.self_attn.out_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 0
},
{
"name": "model.layers.16.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 4194304
},
{
"name": "model.layers.16.mlp.down_proj.q_scale",
"shape": [
8192,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 4210688
},
{
"name": "model.layers.17.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 16793600
},
{
"name": "model.layers.17.mlp.down_proj.q_scale",
"shape": [
8192,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 16809984
}
],
"md5sum": "2c00ec279c700fd6f51b0c021ce59726"
},
{
"dataPath": "params_shard_99.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.17.mlp.up_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "1c5ef2f9c2743f442b28cb838bf86cff"
},
{
"dataPath": "params_shard_100.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.17.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "37cab0c86b9a9c9eb516befab94b5ec3"
},
{
"dataPath": "params_shard_101.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.17.self_attn.out_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "15dea361ba7c95cc842a6c6022b9f33e"
},
{
"dataPath": "params_shard_102.bin",
"format": "raw-shard",
"nbytes": 30408704,
"records": [
{
"name": "model.layers.17.mlp.gate_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.17.mlp.up_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 12582912
},
{
"name": "model.layers.17.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 25165824
}
],
"md5sum": "2e0a031c58b69af93f8270c8567d6417"
},
{
"dataPath": "params_shard_103.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.18.mlp.down_proj.q_weight",
"shape": [
8192,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "83e7e3facf1dbf309df1fe3d5b087470"
},
{
"dataPath": "params_shard_104.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.18.mlp.gate_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "3b07297464f268dd52ed12231d2eedbd"
},
{
"dataPath": "params_shard_105.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.18.mlp.up_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "00a10d74afcd1e3f2b2429c4be0cc573"
},
{
"dataPath": "params_shard_106.bin",
"format": "raw-shard",
"nbytes": 29376512,
"records": [
{
"name": "model.layers.17.self_attn.out_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 0
},
{
"name": "model.layers.18.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 4194304
},
{
"name": "model.layers.18.mlp.down_proj.q_scale",
"shape": [
8192,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 4210688
},
{
"name": "model.layers.18.mlp.gate_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 16793600
}
],
"md5sum": "6631e2352efc1e043f0eef1193dbf91a"
},
{
"dataPath": "params_shard_107.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.18.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "993ad5d624ba876219cf66cd2bd83387"
},
{
"dataPath": "params_shard_108.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.18.self_attn.out_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "3450f3cb9a3b3838b4c8ec5c711469e3"
},
{
"dataPath": "params_shard_109.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.19.mlp.down_proj.q_weight",
"shape": [
8192,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "4dc67f53973ed09eb2376f511cbf311c"
},
{
"dataPath": "params_shard_110.bin",
"format": "raw-shard",
"nbytes": 22036480,
"records": [
{
"name": "model.layers.18.mlp.up_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.18.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 12582912
},
{
"name": "model.layers.18.self_attn.out_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 17825792
},
{
"name": "model.layers.19.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 22020096
}
],
"md5sum": "2996772cb075a3dbcc7b20ef79fc8fc7"
},
{
"dataPath": "params_shard_111.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.19.mlp.gate_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "dd9f5c700653da2587cd2deaa1965e82"
},
{
"dataPath": "params_shard_112.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.19.mlp.up_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "14642b02b288186c66d6bfaa739ac2c3"
},
{
"dataPath": "params_shard_113.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.19.mlp.down_proj.q_scale",
"shape": [
8192,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.19.mlp.gate_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 12582912
}
],
"md5sum": "2fb93bda10a9399a95115b98aad0cf1c"
},
{
"dataPath": "params_shard_114.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.19.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "367be01a41c50f2d3b7a1509d905e2cf"
},
{
"dataPath": "params_shard_115.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.19.self_attn.out_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "b772e6bf541aeb748e3945239bc92a14"
},
{
"dataPath": "params_shard_116.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.20.mlp.down_proj.q_weight",
"shape": [
8192,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "be01e96c3a6e1e377d1f6874684f7ea6"
},
{
"dataPath": "params_shard_117.bin",
"format": "raw-shard",
"nbytes": 22036480,
"records": [
{
"name": "model.layers.19.mlp.up_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.19.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 12582912
},
{
"name": "model.layers.19.self_attn.out_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 17825792
},
{
"name": "model.layers.20.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 22020096
}
],
"md5sum": "495bdf1bf40d8b14b36266169b2d2a48"
},
{
"dataPath": "params_shard_118.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.20.mlp.gate_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "655cb4f25810c8f90181f5c7bce02208"
},
{
"dataPath": "params_shard_119.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.20.mlp.up_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "d2206c47cdfa657d10794b69df10b9c5"
},
{
"dataPath": "params_shard_120.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.20.mlp.down_proj.q_scale",
"shape": [
8192,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.20.mlp.gate_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 12582912
}
],
"md5sum": "4ae5027e39bb6bd3846daaad68045f86"
},
{
"dataPath": "params_shard_121.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.20.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "4cdfed76ef491702013ab7648330cdaa"
},
{
"dataPath": "params_shard_122.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.20.self_attn.out_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "a0f6359cdc39d60a6aedb4a49b3c79fe"
},
{
"dataPath": "params_shard_123.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.21.mlp.down_proj.q_weight",
"shape": [
8192,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "11a3b97435d1c5d7800e68393ef52121"
},
{
"dataPath": "params_shard_124.bin",
"format": "raw-shard",
"nbytes": 22036480,
"records": [
{
"name": "model.layers.20.mlp.up_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.20.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 12582912
},
{
"name": "model.layers.20.self_attn.out_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 17825792
},
{
"name": "model.layers.21.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 22020096
}
],
"md5sum": "17257d7b5af3db95e963f687b4a5c6da"
},
{
"dataPath": "params_shard_125.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.21.mlp.gate_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "7671014de5b454551a343f85f0109ac3"
},
{
"dataPath": "params_shard_126.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.21.mlp.up_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "b5a86d1864e732f004a34d80051ba768"
},
{
"dataPath": "params_shard_127.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.21.mlp.down_proj.q_scale",
"shape": [
8192,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.21.mlp.gate_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 12582912
}
],
"md5sum": "2d7a1c28dab3b70b797674da26155aa4"
},
{
"dataPath": "params_shard_128.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.21.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "90766be437f1368f8b276f52495dd0f1"
},
{
"dataPath": "params_shard_129.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.21.self_attn.out_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "199c94fd208aa04aaa9068a1029abbb7"
},
{
"dataPath": "params_shard_130.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.22.mlp.down_proj.q_weight",
"shape": [
8192,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "f2aca788f52705fd85967475c2d21577"
},
{
"dataPath": "params_shard_131.bin",
"format": "raw-shard",
"nbytes": 22036480,
"records": [
{
"name": "model.layers.21.mlp.up_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.21.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 12582912
},
{
"name": "model.layers.21.self_attn.out_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 17825792
},
{
"name": "model.layers.22.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 22020096
}
],
"md5sum": "13030cefb4e3438db73dfe950be2c1e3"
},
{
"dataPath": "params_shard_132.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.22.mlp.gate_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "ed664daea3633ef3e9308c18be257cbf"
},
{
"dataPath": "params_shard_133.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.22.mlp.up_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "80478eb1621720dbc7c2fbe1ba4cfda1"
},
{
"dataPath": "params_shard_134.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.22.mlp.down_proj.q_scale",
"shape": [
8192,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.22.mlp.gate_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 12582912
}
],
"md5sum": "407fa906d3d59ad97e67be1d1214d47d"
},
{
"dataPath": "params_shard_135.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.22.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "57d43cc6b138c2a87ffcb5c5d63c2fde"
},
{
"dataPath": "params_shard_136.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.22.self_attn.out_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "2efbdf41a1b27b74298ea63bb4de78bc"
},
{
"dataPath": "params_shard_137.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.23.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "9ff16b92fcc55496bb68a685aacf06d6"
},
{
"dataPath": "params_shard_138.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.23.self_attn.out_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "081ed74f16224b00a516969822bd3333"
},
{
"dataPath": "params_shard_139.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.23.mlp.down_proj.q_weight",
"shape": [
8192,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "e4fddb5dfdf50023bca7e1c5a83cd3a9"
},
{
"dataPath": "params_shard_140.bin",
"format": "raw-shard",
"nbytes": 31473664,
"records": [
{
"name": "model.layers.22.mlp.up_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.22.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 12582912
},
{
"name": "model.layers.22.self_attn.out_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 17825792
},
{
"name": "model.layers.23.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 22020096
},
{
"name": "model.layers.23.self_attn.out_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 27262976
},
{
"name": "model.layers.23.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 31457280
}
],
"md5sum": "253614d5205de802a378c2b94bb42c57"
},
{
"dataPath": "params_shard_141.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.23.mlp.gate_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "bb867b647c2776c7d4c41af83e46d532"
},
{
"dataPath": "params_shard_142.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.23.mlp.up_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "8cbb37a698fab5227021e126696c3170"
},
{
"dataPath": "params_shard_143.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.23.mlp.down_proj.q_scale",
"shape": [
8192,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.23.mlp.gate_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 12582912
}
],
"md5sum": "0bc0a49c78d18553f5564de70f9e7bf9"
},
{
"dataPath": "params_shard_144.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.24.mlp.down_proj.q_weight",
"shape": [
8192,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "27e76439a97e8818e4c65f94ae5e3f39"
},
{
"dataPath": "params_shard_145.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.24.mlp.gate_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "3df912b130150912f7bfb04e9581fd5d"
},
{
"dataPath": "params_shard_146.bin",
"format": "raw-shard",
"nbytes": 25182208,
"records": [
{
"name": "model.layers.23.mlp.up_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.24.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 12582912
},
{
"name": "model.layers.24.mlp.down_proj.q_scale",
"shape": [
8192,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 12599296
}
],
"md5sum": "d50f7eed9eeb03756832db326d0ea418"
},
{
"dataPath": "params_shard_147.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.24.mlp.up_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "febebbeed03e4ee55b884547f182dab1"
},
{
"dataPath": "params_shard_148.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.24.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "4aea462cc5cc9d6752889cf51a54f5a3"
},
{
"dataPath": "params_shard_149.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.24.self_attn.out_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "8363cbc4da7eb114810f018fa61cefd4"
},
{
"dataPath": "params_shard_150.bin",
"format": "raw-shard",
"nbytes": 30408704,
"records": [
{
"name": "model.layers.24.mlp.gate_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.24.mlp.up_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 12582912
},
{
"name": "model.layers.24.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 25165824
}
],
"md5sum": "604b3b48077ea664ca891f25a8772a1c"
},
{
"dataPath": "params_shard_151.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.25.mlp.down_proj.q_weight",
"shape": [
8192,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "54adc99b63645320eca047a0d53002fc"
},
{
"dataPath": "params_shard_152.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.25.mlp.gate_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "ca67b637983f73b6340a12a6ccb3b45c"
},
{
"dataPath": "params_shard_153.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.25.mlp.up_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "883be0d729fc369e3f8c07f52b15cf4c"
},
{
"dataPath": "params_shard_154.bin",
"format": "raw-shard",
"nbytes": 29376512,
"records": [
{
"name": "model.layers.24.self_attn.out_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 0
},
{
"name": "model.layers.25.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 4194304
},
{
"name": "model.layers.25.mlp.down_proj.q_scale",
"shape": [
8192,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 4210688
},
{
"name": "model.layers.25.mlp.gate_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 16793600
}
],
"md5sum": "e6f5701d0e23e244c65a4ad8a1c2abc7"
},
{
"dataPath": "params_shard_155.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.25.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "78cd3b237390de448a7d9517eb18ca04"
},
{
"dataPath": "params_shard_156.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.25.self_attn.out_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "1b277acdc6d03d212bb3525db57ec101"
},
{
"dataPath": "params_shard_157.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.26.mlp.gate_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "a98938f7e5b46f699da8a8232241bcfd"
},
{
"dataPath": "params_shard_158.bin",
"format": "raw-shard",
"nbytes": 22020096,
"records": [
{
"name": "model.layers.25.mlp.up_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.25.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 12582912
},
{
"name": "model.layers.25.self_attn.out_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 17825792
}
],
"md5sum": "2739734e3cd89b2ef79360e9651837f2"
},
{
"dataPath": "params_shard_159.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.26.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "7d9c021ea3e9e5fb13ee034ffc116943"
},
{
"dataPath": "params_shard_160.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.26.self_attn.out_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "1372ec4418bdc3e96b8feff440f66c56"
},
{
"dataPath": "params_shard_161.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.26.mlp.down_proj.q_weight",
"shape": [
8192,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "2796a759be023fca18c4e49b2a3b4ba0"
},
{
"dataPath": "params_shard_162.bin",
"format": "raw-shard",
"nbytes": 22036480,
"records": [
{
"name": "model.layers.26.mlp.gate_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.26.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 12582912
},
{
"name": "model.layers.26.self_attn.out_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 17825792
},
{
"name": "model.layers.26.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 22020096
}
],
"md5sum": "3e6218b6c95f476ec4cbc8d7eb030104"
},
{
"dataPath": "params_shard_163.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.26.mlp.up_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "95fd8aef850cdba19793ff8ff433781e"
},
{
"dataPath": "params_shard_164.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.27.mlp.down_proj.q_weight",
"shape": [
8192,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "012c331c996b173e946f0bb5ea397ae2"
},
{
"dataPath": "params_shard_165.bin",
"format": "raw-shard",
"nbytes": 25182208,
"records": [
{
"name": "model.layers.26.mlp.down_proj.q_scale",
"shape": [
8192,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.26.mlp.up_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 12582912
},
{
"name": "model.layers.27.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 25165824
}
],
"md5sum": "b3885da6335e2090600faef6f7216c64"
},
{
"dataPath": "params_shard_166.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.27.mlp.gate_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "efc4cd6c926916d01d90697347734b2d"
},
{
"dataPath": "params_shard_167.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.27.mlp.up_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "d21b913c175a28da5b67c9725ef16a6f"
},
{
"dataPath": "params_shard_168.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.27.mlp.down_proj.q_scale",
"shape": [
8192,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.27.mlp.gate_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 12582912
}
],
"md5sum": "920158616356d9fb750713ed0e8ca0e4"
},
{
"dataPath": "params_shard_169.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.27.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "9ca35b4e3a676c595945f6b7e9fc4810"
},
{
"dataPath": "params_shard_170.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.27.self_attn.out_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "adbee9e546334ff25f1cc376fce2d95a"
},
{
"dataPath": "params_shard_171.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.28.mlp.down_proj.q_weight",
"shape": [
8192,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "9ebedb7dd3e1c6b6a81ef5a52bfbe2c2"
},
{
"dataPath": "params_shard_172.bin",
"format": "raw-shard",
"nbytes": 22036480,
"records": [
{
"name": "model.layers.27.mlp.up_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.27.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 12582912
},
{
"name": "model.layers.27.self_attn.out_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 17825792
},
{
"name": "model.layers.28.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 22020096
}
],
"md5sum": "af12949f9a754ddcfd9c2fbd0cbae023"
},
{
"dataPath": "params_shard_173.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.28.mlp.gate_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "71f3fff4cd5d55f0139c37032e9a7bb4"
},
{
"dataPath": "params_shard_174.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.28.mlp.up_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "0ee5c5d9d64f80751367c7cd40d0f69d"
},
{
"dataPath": "params_shard_175.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.28.mlp.down_proj.q_scale",
"shape": [
8192,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.28.mlp.gate_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 12582912
}
],
"md5sum": "6376598b3f7d25978390e336d9e2da7d"
},
{
"dataPath": "params_shard_176.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.28.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "b3f54f027d412bc0082868438cd57ed3"
},
{
"dataPath": "params_shard_177.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.28.self_attn.out_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "5411a9d9bcccd8ec61298a9e1a3a4946"
},
{
"dataPath": "params_shard_178.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.29.mlp.gate_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "5877862e949bc1e260c9aa932b1ca47d"
},
{
"dataPath": "params_shard_179.bin",
"format": "raw-shard",
"nbytes": 22020096,
"records": [
{
"name": "model.layers.28.mlp.up_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.28.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 12582912
},
{
"name": "model.layers.28.self_attn.out_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 17825792
}
],
"md5sum": "18e9b23348e69f1052442ad57f165a4d"
},
{
"dataPath": "params_shard_180.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.29.mlp.up_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "36ceee473da91077bdd4c9ae0c6b3e8e"
},
{
"dataPath": "params_shard_181.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.29.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "8fd0c54270c792fe5560bf3f7efc0198"
},
{
"dataPath": "params_shard_182.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.29.self_attn.out_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "b70c33126a3af7c7b972fff3254b8d3f"
},
{
"dataPath": "params_shard_183.bin",
"format": "raw-shard",
"nbytes": 30408704,
"records": [
{
"name": "model.layers.29.mlp.gate_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.29.mlp.up_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 12582912
},
{
"name": "model.layers.29.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 25165824
}
],
"md5sum": "bea5b7a41b57226ba5b8590aec474e3b"
},
{
"dataPath": "params_shard_184.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.29.mlp.down_proj.q_weight",
"shape": [
8192,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "4c0dd84b7531b769a9e7d0f27229a694"
},
{
"dataPath": "params_shard_185.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.30.mlp.down_proj.q_weight",
"shape": [
8192,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "fe31a3ff8ec2470c39e56611731d37d6"
},
{
"dataPath": "params_shard_186.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.30.mlp.gate_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "de1e1ac0224210e08d27d9ebea80c9b2"
},
{
"dataPath": "params_shard_187.bin",
"format": "raw-shard",
"nbytes": 29392896,
"records": [
{
"name": "model.layers.29.self_attn.out_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 0
},
{
"name": "model.layers.29.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 4194304
},
{
"name": "model.layers.29.mlp.down_proj.q_scale",
"shape": [
8192,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 4210688
},
{
"name": "model.layers.30.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 16793600
},
{
"name": "model.layers.30.mlp.down_proj.q_scale",
"shape": [
8192,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 16809984
}
],
"md5sum": "d92fa4ed737b5fbb947da65531cb6e0b"
},
{
"dataPath": "params_shard_188.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.30.mlp.up_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "b3e074b27aeb78002c61f652ccfd0b3c"
},
{
"dataPath": "params_shard_189.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.30.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "7d17c52fb692339750b24b76d2a8ba95"
},
{
"dataPath": "params_shard_190.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.30.self_attn.out_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "91cdf175a3941e4a5eb4394c04d35cd4"
},
{
"dataPath": "params_shard_191.bin",
"format": "raw-shard",
"nbytes": 30408704,
"records": [
{
"name": "model.layers.30.mlp.gate_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.30.mlp.up_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 12582912
},
{
"name": "model.layers.30.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 25165824
}
],
"md5sum": "cd65c9ec052b2318fea7736e1567f013"
},
{
"dataPath": "params_shard_192.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.31.mlp.down_proj.q_weight",
"shape": [
8192,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "8948ede8e2d3b135c7e03356a4029c3e"
},
{
"dataPath": "params_shard_193.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.31.mlp.gate_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "75cf21a649b1e5d5f09c2e828d2bb019"
},
{
"dataPath": "params_shard_194.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.31.mlp.up_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "23a53fb92652425f539b2ab7ff532bc2"
},
{
"dataPath": "params_shard_195.bin",
"format": "raw-shard",
"nbytes": 29376512,
"records": [
{
"name": "model.layers.30.self_attn.out_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 0
},
{
"name": "model.layers.31.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 4194304
},
{
"name": "model.layers.31.mlp.down_proj.q_scale",
"shape": [
8192,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 4210688
},
{
"name": "model.layers.31.mlp.gate_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 16793600
}
],
"md5sum": "4c26d7045b8339b0519930cdb0bbffd6"
},
{
"dataPath": "params_shard_196.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.31.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "09c04e2ee2608e8e7563dc653f8c9085"
},
{
"dataPath": "params_shard_197.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.31.self_attn.out_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "5178d2eb7e8d28c95f4ac30e1538fbee"
},
{
"dataPath": "params_shard_198.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.32.mlp.down_proj.q_weight",
"shape": [
8192,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "78e488566db95738296574ddd47e58d5"
},
{
"dataPath": "params_shard_199.bin",
"format": "raw-shard",
"nbytes": 22036480,
"records": [
{
"name": "model.layers.31.mlp.up_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.31.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 12582912
},
{
"name": "model.layers.31.self_attn.out_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 17825792
},
{
"name": "model.layers.32.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 22020096
}
],
"md5sum": "78e1e1e5974a206e5c66b94b01998cd6"
},
{
"dataPath": "params_shard_200.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.32.mlp.gate_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "911e19d18e62f55a6377d85cdf79ef90"
},
{
"dataPath": "params_shard_201.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.32.mlp.up_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "5175e14d1babdb641505d3d225213028"
},
{
"dataPath": "params_shard_202.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.32.mlp.down_proj.q_scale",
"shape": [
8192,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.32.mlp.gate_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 12582912
}
],
"md5sum": "9ba4a16ff6ddf0ac05b5ac84a46e72a8"
},
{
"dataPath": "params_shard_203.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.32.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "e261f0f837929c1353eea9bb62dca829"
},
{
"dataPath": "params_shard_204.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.32.self_attn.out_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "0a6690417d9c40185d4aa3a6079e0acb"
},
{
"dataPath": "params_shard_205.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.3.mlp.down_proj.q_weight",
"shape": [
8192,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "0eb24a0d0f54ba6c56319a9ca758406e"
},
{
"dataPath": "params_shard_206.bin",
"format": "raw-shard",
"nbytes": 22036480,
"records": [
{
"name": "model.layers.32.mlp.up_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.32.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 12582912
},
{
"name": "model.layers.32.self_attn.out_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 17825792
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 22020096
}
],
"md5sum": "1e070d79aae81a1fee5471080d03706b"
},
{
"dataPath": "params_shard_207.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.4.mlp.down_proj.q_weight",
"shape": [
8192,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "3be032f6fde4b0e2c9f06a00a61d9752"
},
{
"dataPath": "params_shard_208.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.4.mlp.gate_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "fc6241a3cb59754735cc2d80b492934d"
},
{
"dataPath": "params_shard_209.bin",
"format": "raw-shard",
"nbytes": 25182208,
"records": [
{
"name": "model.layers.3.mlp.down_proj.q_scale",
"shape": [
8192,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 12582912
},
{
"name": "model.layers.4.mlp.down_proj.q_scale",
"shape": [
8192,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 12599296
}
],
"md5sum": "cf8167ca3822a2425c8465945af0124f"
},
{
"dataPath": "params_shard_210.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.4.mlp.up_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "79cd624f93cd67ee7dbe9c81a86f4ee0"
},
{
"dataPath": "params_shard_211.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.4.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "01d03f90201c018cb9c7684240e0ab3b"
},
{
"dataPath": "params_shard_212.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.4.self_attn.out_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "42a88c5e991f56bce906684ec52b8ad0"
},
{
"dataPath": "params_shard_213.bin",
"format": "raw-shard",
"nbytes": 30408704,
"records": [
{
"name": "model.layers.4.mlp.gate_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.4.mlp.up_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 12582912
},
{
"name": "model.layers.4.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 25165824
}
],
"md5sum": "ce528af5c7a5b5b5d69907e120110876"
},
{
"dataPath": "params_shard_214.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.5.mlp.down_proj.q_weight",
"shape": [
8192,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "f1d4a53f2a843cef2f0e7e02b2d242e2"
},
{
"dataPath": "params_shard_215.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.5.mlp.gate_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "0991b1a4a33776ee2502664f69834c38"
},
{
"dataPath": "params_shard_216.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.5.mlp.up_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "e2d67bbebf8a4c55139918775c7513ec"
},
{
"dataPath": "params_shard_217.bin",
"format": "raw-shard",
"nbytes": 29376512,
"records": [
{
"name": "model.layers.4.self_attn.out_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 0
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 4194304
},
{
"name": "model.layers.5.mlp.down_proj.q_scale",
"shape": [
8192,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 4210688
},
{
"name": "model.layers.5.mlp.gate_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 16793600
}
],
"md5sum": "714f0f1950bcc69858b4907c83826396"
},
{
"dataPath": "params_shard_218.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.5.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "e7b47ab702cf311fd65950e0b6e68e4d"
},
{
"dataPath": "params_shard_219.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.5.self_attn.out_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "5170efffb9e4c7d0a8d75395894132d3"
},
{
"dataPath": "params_shard_220.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.6.mlp.down_proj.q_weight",
"shape": [
8192,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "06483c2b255842e78204368a479eccb1"
},
{
"dataPath": "params_shard_221.bin",
"format": "raw-shard",
"nbytes": 22036480,
"records": [
{
"name": "model.layers.5.mlp.up_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.5.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 12582912
},
{
"name": "model.layers.5.self_attn.out_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 17825792
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 22020096
}
],
"md5sum": "6ef7f176a06e94352ed0baf0bff1984e"
},
{
"dataPath": "params_shard_222.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.6.mlp.gate_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "aa8a768fc3d7c7c1b692d813bb394193"
},
{
"dataPath": "params_shard_223.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.6.mlp.up_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "e68a6b600dd18121bc0e0446fd48badc"
},
{
"dataPath": "params_shard_224.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.6.mlp.down_proj.q_scale",
"shape": [
8192,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.6.mlp.gate_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 12582912
}
],
"md5sum": "648c4b4062a80a75cf6408023b6e8901"
},
{
"dataPath": "params_shard_225.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.6.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "4eeab07ac289ae1220341569749ba8bb"
},
{
"dataPath": "params_shard_226.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.6.self_attn.out_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "43cd0011a165b7f4fbb269b919934c27"
},
{
"dataPath": "params_shard_227.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.33.mlp.down_proj.q_weight",
"shape": [
8192,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "fe79c2bf46b6bfe9f0c15d428645beed"
},
{
"dataPath": "params_shard_228.bin",
"format": "raw-shard",
"nbytes": 22036480,
"records": [
{
"name": "model.layers.6.mlp.up_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.6.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 12582912
},
{
"name": "model.layers.6.self_attn.out_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 17825792
},
{
"name": "model.layers.33.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 22020096
}
],
"md5sum": "44fc71eda2ae0a0e3c7445d8f903b21b"
},
{
"dataPath": "params_shard_229.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.33.mlp.gate_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "5d6d8433304184cc5d97fdb2bffc6782"
},
{
"dataPath": "params_shard_230.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.33.mlp.up_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "49e67bad8e07a96d58c588321eac19de"
},
{
"dataPath": "params_shard_231.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.33.mlp.down_proj.q_scale",
"shape": [
8192,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.33.mlp.gate_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 12582912
}
],
"md5sum": "a2405a01971b42ad2e2908e6d316c6e3"
},
{
"dataPath": "params_shard_232.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.33.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "78effae2f932c438d62ad2c573a34578"
},
{
"dataPath": "params_shard_233.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.33.self_attn.out_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "e77e05ff758051bbce50fce302e71cd5"
},
{
"dataPath": "params_shard_234.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.34.mlp.down_proj.q_weight",
"shape": [
8192,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "36a79fd9dac2c22e97eeea2ab5813994"
},
{
"dataPath": "params_shard_235.bin",
"format": "raw-shard",
"nbytes": 22036480,
"records": [
{
"name": "model.layers.33.mlp.up_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.33.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 12582912
},
{
"name": "model.layers.33.self_attn.out_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 17825792
},
{
"name": "model.layers.34.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 22020096
}
],
"md5sum": "2d801bcab451a0742c7b6f9bf65e9673"
},
{
"dataPath": "params_shard_236.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.34.mlp.gate_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "d479c98ba0ed88bb8f8bfbcd9282b6bc"
},
{
"dataPath": "params_shard_237.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.34.mlp.up_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "777c53d37f58448e61ee7dfd2ca8124b"
},
{
"dataPath": "params_shard_238.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.34.mlp.down_proj.q_scale",
"shape": [
8192,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.34.mlp.gate_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 12582912
}
],
"md5sum": "4e4d4f4c232e4445bc9f89d7bc95012b"
},
{
"dataPath": "params_shard_239.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.34.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "deca0639b0ccef6bb27fc0e512489b9e"
},
{
"dataPath": "params_shard_240.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.34.self_attn.out_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "ee44f2c06fdcf9e12cb4e5370c0c788b"
},
{
"dataPath": "params_shard_241.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.35.mlp.down_proj.q_weight",
"shape": [
8192,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "2a35d2d0db32d2e8afe6bb1305440f51"
},
{
"dataPath": "params_shard_242.bin",
"format": "raw-shard",
"nbytes": 22036480,
"records": [
{
"name": "model.layers.34.mlp.up_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.34.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 12582912
},
{
"name": "model.layers.34.self_attn.out_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 17825792
},
{
"name": "model.layers.35.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 22020096
}
],
"md5sum": "1380ba41e27b95e4e1412704e004c6df"
},
{
"dataPath": "params_shard_243.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.35.mlp.gate_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "5c416e5c89ec8817abd2ab846720d9c0"
},
{
"dataPath": "params_shard_244.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.35.mlp.up_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "f88c1a5a019f1666b8ce54f05d0a1f2d"
},
{
"dataPath": "params_shard_245.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.35.mlp.down_proj.q_scale",
"shape": [
8192,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.35.mlp.gate_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 12582912
}
],
"md5sum": "62591bfb3a04cad63af54edab2567674"
},
{
"dataPath": "params_shard_246.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.35.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "76c78b06f7c8b9cd6ddbef59abbe6b6d"
},
{
"dataPath": "params_shard_247.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.35.self_attn.out_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "c198d239a04aebb43686abca3d18f85b"
},
{
"dataPath": "params_shard_248.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.36.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "a569ea42fab92c3da4b1d6856293ac24"
},
{
"dataPath": "params_shard_249.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.36.self_attn.out_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "64d3e0da35a237dd06d1ab7923a5e30a"
},
{
"dataPath": "params_shard_250.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.36.mlp.down_proj.q_weight",
"shape": [
8192,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "0ed7118aaaa0aa982ac26216ec129108"
},
{
"dataPath": "params_shard_251.bin",
"format": "raw-shard",
"nbytes": 31473664,
"records": [
{
"name": "model.layers.35.mlp.up_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.35.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 12582912
},
{
"name": "model.layers.35.self_attn.out_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 17825792
},
{
"name": "model.layers.36.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 22020096
},
{
"name": "model.layers.36.self_attn.out_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 27262976
},
{
"name": "model.layers.36.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 31457280
}
],
"md5sum": "1ec2260ca83366c0d82b205ddb074816"
},
{
"dataPath": "params_shard_252.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.36.mlp.gate_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "fdad05fa73c5aa520564b4d1ff014020"
},
{
"dataPath": "params_shard_253.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.36.mlp.up_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "0af6a81bd74a8640fca3d877b41fb836"
},
{
"dataPath": "params_shard_254.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.36.mlp.down_proj.q_scale",
"shape": [
8192,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.36.mlp.gate_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 12582912
}
],
"md5sum": "c59a0f911667811a32eb8f79eb2edf18"
},
{
"dataPath": "params_shard_255.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.37.mlp.down_proj.q_weight",
"shape": [
8192,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "987c2303266ef4bf72bce6e6fddb686d"
},
{
"dataPath": "params_shard_256.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.37.mlp.gate_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "d361fa9fd4f1d532cb97fbb05dcdaf41"
},
{
"dataPath": "params_shard_257.bin",
"format": "raw-shard",
"nbytes": 25182208,
"records": [
{
"name": "model.layers.36.mlp.up_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.37.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 12582912
},
{
"name": "model.layers.37.mlp.down_proj.q_scale",
"shape": [
8192,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 12599296
}
],
"md5sum": "340e73f5b4c4942fe2539a9fc79f59b1"
},
{
"dataPath": "params_shard_258.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.37.mlp.up_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "def9e645888cdf9276216a82e15786ef"
},
{
"dataPath": "params_shard_259.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.37.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "26c03b8779ac0c2c83f163dea7662270"
},
{
"dataPath": "params_shard_260.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.37.self_attn.out_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "3a53777ec83764d049d500d065dfcb0f"
},
{
"dataPath": "params_shard_261.bin",
"format": "raw-shard",
"nbytes": 30408704,
"records": [
{
"name": "model.layers.37.mlp.gate_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.37.mlp.up_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 12582912
},
{
"name": "model.layers.37.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 25165824
}
],
"md5sum": "a9525c5314c0cc813a1a56ffe761cba7"
},
{
"dataPath": "params_shard_262.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.38.mlp.down_proj.q_weight",
"shape": [
8192,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "fadcb5d67060b71bb3e4205a0fcbdbef"
},
{
"dataPath": "params_shard_263.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.38.mlp.gate_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "a72a090402be0649130c7a217297da02"
},
{
"dataPath": "params_shard_264.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.38.mlp.up_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "3ddbfdf5ddc4fc19c283bc1c9bb7efe3"
},
{
"dataPath": "params_shard_265.bin",
"format": "raw-shard",
"nbytes": 29376512,
"records": [
{
"name": "model.layers.37.self_attn.out_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 0
},
{
"name": "model.layers.38.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 4194304
},
{
"name": "model.layers.38.mlp.down_proj.q_scale",
"shape": [
8192,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 4210688
},
{
"name": "model.layers.38.mlp.gate_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 16793600
}
],
"md5sum": "f44f4b29331fe6735429ef373f4ad943"
},
{
"dataPath": "params_shard_266.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.38.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "303b5db25f694fa910bc3c8509f6f515"
},
{
"dataPath": "params_shard_267.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.38.self_attn.out_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "c4e795e1e0ee7fa92192db669292920e"
},
{
"dataPath": "params_shard_268.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.39.mlp.gate_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "5ce24bc64a32d89be5a66db99299d1ca"
},
{
"dataPath": "params_shard_269.bin",
"format": "raw-shard",
"nbytes": 22020096,
"records": [
{
"name": "model.layers.38.mlp.up_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.38.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 12582912
},
{
"name": "model.layers.38.self_attn.out_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 17825792
}
],
"md5sum": "7ccebedeebcbc25f099478a886939fc5"
},
{
"dataPath": "params_shard_270.bin",
"format": "raw-shard",
"nbytes": 41943040,
"records": [
{
"name": "model.layers.39.self_attn.qkv_proj.q_weight",
"shape": [
10240,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 41943040,
"byteOffset": 0
}
],
"md5sum": "9bfec26212f6153695a7bf94b7ed8901"
},
{
"dataPath": "params_shard_271.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.39.self_attn.out_proj.q_weight",
"shape": [
8192,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "43c4a501ae5c0677f3777f18969abf1d"
},
{
"dataPath": "params_shard_272.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.39.mlp.down_proj.q_weight",
"shape": [
8192,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "368521515108205d26c5353b9cfd6b5b"
},
{
"dataPath": "params_shard_273.bin",
"format": "raw-shard",
"nbytes": 22036480,
"records": [
{
"name": "model.layers.39.mlp.gate_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.39.self_attn.qkv_proj.q_scale",
"shape": [
10240,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5242880,
"byteOffset": 12582912
},
{
"name": "model.layers.39.self_attn.out_proj.q_scale",
"shape": [
8192,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4194304,
"byteOffset": 17825792
},
{
"name": "model.layers.39.input_layernorm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 22020096
}
],
"md5sum": "472cefa815c9075f98e8eddcbbaef27d"
},
{
"dataPath": "params_shard_274.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.39.mlp.up_proj.q_weight",
"shape": [
24576,
1024
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "e09c7ddf26c54324ae72d9484eb8b736"
},
{
"dataPath": "params_shard_275.bin",
"format": "raw-shard",
"nbytes": 25182208,
"records": [
{
"name": "model.layers.39.mlp.down_proj.q_scale",
"shape": [
8192,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.39.mlp.up_proj.q_scale",
"shape": [
24576,
256
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 12582912
},
{
"name": "model.norm.weight",
"shape": [
8192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 16384,
"byteOffset": 25165824
}
],
"md5sum": "5f0b7ad0570878b150bf0d46ac6a1a03"
}
]
}