Llama-3-8B-Instruct-fp8-MLC / ndarray-cache.json
ruihanglai's picture
initial commit
a8d340a
{
"metadata": {
"ParamSize": 323,
"ParamBytes": 9081201152.0,
"BitsPerParam": 9.046979540559027
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 1050673152,
"records": [
{
"name": "lm_head.weight",
"shape": [
128256,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1050673152,
"byteOffset": 0
}
],
"md5sum": "acaaa0d1fae40667cf6951581c648544"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.31.mlp.down_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "94df6ed333fde0dbb6bfe1691fa17364"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 1050673152,
"records": [
{
"name": "model.embed_tokens.weight",
"shape": [
128256,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1050673152,
"byteOffset": 0
}
],
"md5sum": "d75c672caa231e7f0c33b49ea425706d"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.0.mlp.down_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "17b6e3bfa3634f6b47b27539827ad1f0"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.q_weight",
"shape": [
28672,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "c4b1ff2c673ea184f480b2dc447abae0"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 16777216,
"records": [
{
"name": "model.layers.0.self_attn.o_proj.q_weight",
"shape": [
4096,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 0
}
],
"md5sum": "10fd90c0ebd57bfb5d7d847ee4a6db8d"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.1.mlp.down_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "50709d56bc47243fbcd5090d6267b07e"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.q_weight",
"shape": [
28672,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "8aa978e2392da213f8d27fbc9f7101da"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.1.self_attn.qkv_proj.q_weight",
"shape": [
6144,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "39378fab4003dce8ff61fbd70738aee3"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 16777216,
"records": [
{
"name": "model.layers.1.self_attn.o_proj.q_weight",
"shape": [
4096,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 0
}
],
"md5sum": "fdeefe6cb5990c66a1f53735bc88a76d"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.2.mlp.down_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "6dbc5e5901c7c65535d3d8424f4ba1bb"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.q_weight",
"shape": [
28672,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "4605bc9bc9364ce3811afed7f6279eb3"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.2.self_attn.qkv_proj.q_weight",
"shape": [
6144,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "b97bb4dcf8cae47f2f81f11961fd7565"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 16777216,
"records": [
{
"name": "model.layers.2.self_attn.o_proj.q_weight",
"shape": [
4096,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 0
}
],
"md5sum": "0c27aa6b784847cdb995dbb48147434c"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.3.mlp.down_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "e359be74be6731aace4f255bffaf2c06"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.q_weight",
"shape": [
28672,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "4ff3b33d638fbc789de35b828f4ddbf2"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.3.self_attn.qkv_proj.q_weight",
"shape": [
6144,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "def84fd83079140cb7cfb4178ac67e3a"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 16777216,
"records": [
{
"name": "model.layers.3.self_attn.o_proj.q_weight",
"shape": [
4096,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 0
}
],
"md5sum": "6009eff5669ec400f2793c72c32e39ae"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.4.mlp.down_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "8f4114f7bbc7a21b20ca674425d31805"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.q_weight",
"shape": [
28672,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "d13928ed0282bb2c68869385d3b8f9ce"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.4.self_attn.qkv_proj.q_weight",
"shape": [
6144,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "8842c98abab621b9d53e2f9261b75159"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 16777216,
"records": [
{
"name": "model.layers.4.self_attn.o_proj.q_weight",
"shape": [
4096,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 0
}
],
"md5sum": "16d74ec0a9783f83137f6db22ed672e9"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.5.mlp.down_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "dced9d7894a234aa55f87c31ec2c8793"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.q_weight",
"shape": [
28672,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "c03b2378a72cb8fc55c12d17e541d4a6"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.5.self_attn.qkv_proj.q_weight",
"shape": [
6144,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "a3afdf0bfa8df0491290af52ff08728d"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 16777216,
"records": [
{
"name": "model.layers.5.self_attn.o_proj.q_weight",
"shape": [
4096,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 0
}
],
"md5sum": "3cd9b7213db2729501e464fcdacb81b6"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.6.mlp.down_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "25683c4bc4e6918161d2e7df8c6617f3"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.q_weight",
"shape": [
28672,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "8efc97ddb7f4f7ed5b49bb4355144099"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.6.self_attn.qkv_proj.q_weight",
"shape": [
6144,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "c8cb15b304fbbb55435f4199bcd2e6be"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 16777216,
"records": [
{
"name": "model.layers.6.self_attn.o_proj.q_weight",
"shape": [
4096,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 0
}
],
"md5sum": "174a2d7bd81ce377abd745610758a40d"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.7.mlp.down_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "323393cc19aba26aa4f874c5036749e6"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.q_weight",
"shape": [
28672,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "bac572ef0647d05de2f0c352ee2a2252"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.7.self_attn.qkv_proj.q_weight",
"shape": [
6144,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "14dc3bb8807eb413d26d26a164873659"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 16777216,
"records": [
{
"name": "model.layers.7.self_attn.o_proj.q_weight",
"shape": [
4096,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 0
}
],
"md5sum": "c0b133fb38ecb4b505f8e794ea593e0b"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.8.mlp.down_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "a5f8da5b6aa404ddf262dee8bc16e05c"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.q_weight",
"shape": [
28672,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "492e1f5059bd28e09165cb73a5ea26fd"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.8.self_attn.qkv_proj.q_weight",
"shape": [
6144,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "799427e4f2bf3a5ff8fdb1982c98813e"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 16777216,
"records": [
{
"name": "model.layers.8.self_attn.o_proj.q_weight",
"shape": [
4096,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 0
}
],
"md5sum": "ea98d69ab93e3a4265f1a9d9fd163551"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.10.mlp.down_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "197e5366b70018351dde47410e6897e3"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.10.mlp.gate_up_proj.q_weight",
"shape": [
28672,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "547b90a732909cc56e0f6edee7ac0fd0"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.10.self_attn.qkv_proj.q_weight",
"shape": [
6144,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "4a78e282ed8231e246042ccd3b4c44d4"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 16777216,
"records": [
{
"name": "model.layers.10.self_attn.o_proj.q_weight",
"shape": [
4096,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 0
}
],
"md5sum": "be6e38136677eaee017ab7dc9397e37c"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.11.mlp.down_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "c64123d97b4fea7365ca80a000a99fe7"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.11.mlp.gate_up_proj.q_weight",
"shape": [
28672,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "8cbe5b804e73567a88c6379e918afc07"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.11.self_attn.qkv_proj.q_weight",
"shape": [
6144,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "10139de7cb709004131c62972ed1497c"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 16777216,
"records": [
{
"name": "model.layers.11.self_attn.o_proj.q_weight",
"shape": [
4096,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 0
}
],
"md5sum": "c1c241cdb9cec7383d72af9545616d2a"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.12.mlp.down_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "144f3fb53bda5a56a18b037a1427e2cf"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.q_weight",
"shape": [
28672,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "7516d2b3712262001c91a03658dbc3d8"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.12.self_attn.qkv_proj.q_weight",
"shape": [
6144,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "5b2f56c4e1001fc49728f82f67835bc4"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 16777216,
"records": [
{
"name": "model.layers.12.self_attn.o_proj.q_weight",
"shape": [
4096,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 0
}
],
"md5sum": "e79acf5186a0719067db77a3c8023503"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.13.mlp.down_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "163a59e5bb48207e4f7bdc4993e1e8d2"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.13.mlp.gate_up_proj.q_weight",
"shape": [
28672,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "60d05afc14746a7d5ea35ce0e7976a31"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.13.self_attn.qkv_proj.q_weight",
"shape": [
6144,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "d2156d76356b97c7717c7c6641b1124e"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 16777216,
"records": [
{
"name": "model.layers.13.self_attn.o_proj.q_weight",
"shape": [
4096,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 0
}
],
"md5sum": "f2dad698d45ca57e0a93d4d0ee1904be"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.14.mlp.down_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "7a701e68fbef8fea088e83d9d633eea4"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.q_weight",
"shape": [
28672,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "be46fd38f5c60b11e9eaf4c1eda22b6a"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.14.self_attn.qkv_proj.q_weight",
"shape": [
6144,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "9428dab85d3be82e84a8e39385e7ae45"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 16777216,
"records": [
{
"name": "model.layers.14.self_attn.o_proj.q_weight",
"shape": [
4096,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 0
}
],
"md5sum": "72ee15ab6b5ab856a2868eeefca28315"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.15.mlp.down_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "b7de47ed9081494e19ffb15edfbb5a74"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.15.mlp.gate_up_proj.q_weight",
"shape": [
28672,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "0197d7c4c7d93fb9c049e42449438ede"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.15.self_attn.qkv_proj.q_weight",
"shape": [
6144,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "83d10f67035eb94ebc0ebec56b19b34a"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 16777216,
"records": [
{
"name": "model.layers.15.self_attn.o_proj.q_weight",
"shape": [
4096,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 0
}
],
"md5sum": "3fe4467ea934ff721192aecc14a12538"
},
{
"dataPath": "params_shard_62.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.16.mlp.down_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "b48382d2f5c78c57b793fac6e87eefd2"
},
{
"dataPath": "params_shard_63.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.q_weight",
"shape": [
28672,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "c7004a8665ca642bb4eccea073fcaf31"
},
{
"dataPath": "params_shard_64.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.16.self_attn.qkv_proj.q_weight",
"shape": [
6144,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "8696079905152fdea4dc7ede254973c5"
},
{
"dataPath": "params_shard_65.bin",
"format": "raw-shard",
"nbytes": 16777216,
"records": [
{
"name": "model.layers.16.self_attn.o_proj.q_weight",
"shape": [
4096,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 0
}
],
"md5sum": "1273b50c0e7430d7b57b26ef0972e9d9"
},
{
"dataPath": "params_shard_66.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.17.mlp.down_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "fa9d531aea0285d92582a5a686dd974c"
},
{
"dataPath": "params_shard_67.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.17.mlp.gate_up_proj.q_weight",
"shape": [
28672,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "8be6f905d75a5cbccae7030a234fef5e"
},
{
"dataPath": "params_shard_68.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.17.self_attn.qkv_proj.q_weight",
"shape": [
6144,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "55bf30c710c8075ec9ebcfdc865fa568"
},
{
"dataPath": "params_shard_69.bin",
"format": "raw-shard",
"nbytes": 16777216,
"records": [
{
"name": "model.layers.17.self_attn.o_proj.q_weight",
"shape": [
4096,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 0
}
],
"md5sum": "f0cc21639e48dab64b340ff8d6be086f"
},
{
"dataPath": "params_shard_70.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.18.mlp.down_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "7aa6ae88f116fece2b144adf21e05d4c"
},
{
"dataPath": "params_shard_71.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.18.mlp.gate_up_proj.q_weight",
"shape": [
28672,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "9129e46b6a3f3545dfff1f12760bb4ee"
},
{
"dataPath": "params_shard_72.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.18.self_attn.qkv_proj.q_weight",
"shape": [
6144,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "c26a24b975ffc3678f38e087a1275a04"
},
{
"dataPath": "params_shard_73.bin",
"format": "raw-shard",
"nbytes": 16777216,
"records": [
{
"name": "model.layers.18.self_attn.o_proj.q_weight",
"shape": [
4096,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 0
}
],
"md5sum": "9c22b9cc8b8456dc83180dcf42a1d0bc"
},
{
"dataPath": "params_shard_74.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.19.mlp.down_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "86505a742e756c9db06f8ddad723ad96"
},
{
"dataPath": "params_shard_75.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.19.mlp.gate_up_proj.q_weight",
"shape": [
28672,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "7001b55d8181bf846bfe3185fe86abfa"
},
{
"dataPath": "params_shard_76.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.19.self_attn.qkv_proj.q_weight",
"shape": [
6144,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "8cad816b876c08014628ff8b0d91d959"
},
{
"dataPath": "params_shard_77.bin",
"format": "raw-shard",
"nbytes": 16777216,
"records": [
{
"name": "model.layers.19.self_attn.o_proj.q_weight",
"shape": [
4096,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 0
}
],
"md5sum": "19c2a2c06095911afed82d19f8454f08"
},
{
"dataPath": "params_shard_78.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.20.mlp.gate_up_proj.q_weight",
"shape": [
28672,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "ace475b17186bf0f6d6c7c179654d584"
},
{
"dataPath": "params_shard_79.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.20.self_attn.qkv_proj.q_weight",
"shape": [
6144,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "eba653be0a86ee4eaf84559e75fe40b1"
},
{
"dataPath": "params_shard_80.bin",
"format": "raw-shard",
"nbytes": 16777216,
"records": [
{
"name": "model.layers.20.self_attn.o_proj.q_weight",
"shape": [
4096,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 0
}
],
"md5sum": "2a645b080fc3a01571695c271ec272a4"
},
{
"dataPath": "params_shard_81.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.9.mlp.down_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "eb84aca9807058856778feaf6e6c8614"
},
{
"dataPath": "params_shard_82.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.q_weight",
"shape": [
28672,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "3fea6b4c4b13e64e1c155510599a0e82"
},
{
"dataPath": "params_shard_83.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.9.self_attn.qkv_proj.q_weight",
"shape": [
6144,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "ea173e06c101300ebd9bca2c70886b33"
},
{
"dataPath": "params_shard_84.bin",
"format": "raw-shard",
"nbytes": 16777216,
"records": [
{
"name": "model.layers.9.self_attn.o_proj.q_weight",
"shape": [
4096,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 0
}
],
"md5sum": "33eb76fb9d0cfe48496b1e19721cd29e"
},
{
"dataPath": "params_shard_85.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.20.mlp.down_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "ba71265b9196ba2d9246390ab344dc1b"
},
{
"dataPath": "params_shard_86.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.21.mlp.down_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "1321d0a702a5894d43236004af24b10e"
},
{
"dataPath": "params_shard_87.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.21.mlp.gate_up_proj.q_weight",
"shape": [
28672,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "20649695c42e8bf29bcf0df1a47437ea"
},
{
"dataPath": "params_shard_88.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.21.self_attn.qkv_proj.q_weight",
"shape": [
6144,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "aa557c2ed757b17da3c71110de9f2092"
},
{
"dataPath": "params_shard_89.bin",
"format": "raw-shard",
"nbytes": 16777216,
"records": [
{
"name": "model.layers.21.self_attn.o_proj.q_weight",
"shape": [
4096,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 0
}
],
"md5sum": "4ba9f34ae8dd49c31f7db7e01581b93b"
},
{
"dataPath": "params_shard_90.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.22.mlp.down_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "62b635038eb228f798297c62f57d0572"
},
{
"dataPath": "params_shard_91.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.22.mlp.gate_up_proj.q_weight",
"shape": [
28672,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "72536650ba082f1f8e352d8c47973acb"
},
{
"dataPath": "params_shard_92.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.22.self_attn.qkv_proj.q_weight",
"shape": [
6144,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "a05802f431c544a732f71c582fc3c402"
},
{
"dataPath": "params_shard_93.bin",
"format": "raw-shard",
"nbytes": 16777216,
"records": [
{
"name": "model.layers.22.self_attn.o_proj.q_weight",
"shape": [
4096,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 0
}
],
"md5sum": "dd778e27ba0787e8c5db9af59d5bed6a"
},
{
"dataPath": "params_shard_94.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.23.mlp.down_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "975f90591d6c62ea460603ea7305fcc2"
},
{
"dataPath": "params_shard_95.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.23.mlp.gate_up_proj.q_weight",
"shape": [
28672,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "bab15d300d668109ce7c594a7591c059"
},
{
"dataPath": "params_shard_96.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.23.self_attn.qkv_proj.q_weight",
"shape": [
6144,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "d7ebcfcfc4e6b65d72ebff9044a8a8f3"
},
{
"dataPath": "params_shard_97.bin",
"format": "raw-shard",
"nbytes": 16777216,
"records": [
{
"name": "model.layers.23.self_attn.o_proj.q_weight",
"shape": [
4096,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 0
}
],
"md5sum": "a8f2353231649fd8066643611ced34ed"
},
{
"dataPath": "params_shard_98.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.24.mlp.down_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "6f8d4eaf597930e6ca5f427d2af9f4d8"
},
{
"dataPath": "params_shard_99.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.24.mlp.gate_up_proj.q_weight",
"shape": [
28672,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "cd6e53cd711a6ad80d4e1841a746da97"
},
{
"dataPath": "params_shard_100.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.24.self_attn.qkv_proj.q_weight",
"shape": [
6144,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "6b919db4361c051483f16964995a9f51"
},
{
"dataPath": "params_shard_101.bin",
"format": "raw-shard",
"nbytes": 16777216,
"records": [
{
"name": "model.layers.24.self_attn.o_proj.q_weight",
"shape": [
4096,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 0
}
],
"md5sum": "7c920bca94c8e8c8b2345dac59094d91"
},
{
"dataPath": "params_shard_102.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.25.mlp.down_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "d96c6a96bd7a219806378b4a4bf6c6e7"
},
{
"dataPath": "params_shard_103.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.25.mlp.gate_up_proj.q_weight",
"shape": [
28672,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "d572dfc533fe7b0b4f56b4ce8568061c"
},
{
"dataPath": "params_shard_104.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.25.self_attn.qkv_proj.q_weight",
"shape": [
6144,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "ef1c4e3807d04053d86520236f16a3f6"
},
{
"dataPath": "params_shard_105.bin",
"format": "raw-shard",
"nbytes": 16777216,
"records": [
{
"name": "model.layers.25.self_attn.o_proj.q_weight",
"shape": [
4096,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 0
}
],
"md5sum": "c2d0d27ca178dae4fe5058bc3b3939a6"
},
{
"dataPath": "params_shard_106.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.26.mlp.down_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "2f445b381730d4e6fdbb6fba2241060c"
},
{
"dataPath": "params_shard_107.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.26.mlp.gate_up_proj.q_weight",
"shape": [
28672,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "245a915a3a85de9e89c2a1f4bad4bc96"
},
{
"dataPath": "params_shard_108.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.26.self_attn.qkv_proj.q_weight",
"shape": [
6144,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "d91394a730af2b55df3ee6200fc6d636"
},
{
"dataPath": "params_shard_109.bin",
"format": "raw-shard",
"nbytes": 16777216,
"records": [
{
"name": "model.layers.26.self_attn.o_proj.q_weight",
"shape": [
4096,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 0
}
],
"md5sum": "64dbbca1f1cf51aeadd82ffe0670060b"
},
{
"dataPath": "params_shard_110.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.27.mlp.down_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "eda010d7943a8d5c0cf1288cb2cdf9eb"
},
{
"dataPath": "params_shard_111.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.27.mlp.gate_up_proj.q_weight",
"shape": [
28672,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "14a0f7c3b6763130034e3f0f2e5f4549"
},
{
"dataPath": "params_shard_112.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.27.self_attn.qkv_proj.q_weight",
"shape": [
6144,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "b6561be2ef05c17ea6016af806347a11"
},
{
"dataPath": "params_shard_113.bin",
"format": "raw-shard",
"nbytes": 16777216,
"records": [
{
"name": "model.layers.27.self_attn.o_proj.q_weight",
"shape": [
4096,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 0
}
],
"md5sum": "03321b27dfa2a035b15bdad271c3c6f4"
},
{
"dataPath": "params_shard_114.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.28.mlp.down_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "c028e4698d34c1be7163460f03b30509"
},
{
"dataPath": "params_shard_115.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.28.mlp.gate_up_proj.q_weight",
"shape": [
28672,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "02ae1beea7486d631cbc378d521e8f2c"
},
{
"dataPath": "params_shard_116.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.28.self_attn.qkv_proj.q_weight",
"shape": [
6144,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "e64d3ab521a092fe0fa37be649596da2"
},
{
"dataPath": "params_shard_117.bin",
"format": "raw-shard",
"nbytes": 16777216,
"records": [
{
"name": "model.layers.28.self_attn.o_proj.q_weight",
"shape": [
4096,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 0
}
],
"md5sum": "5b477f1efde3ace2c3177b969f4e6470"
},
{
"dataPath": "params_shard_118.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.29.mlp.down_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "802ea14dd8e010e163f5edeafca32141"
},
{
"dataPath": "params_shard_119.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.29.mlp.gate_up_proj.q_weight",
"shape": [
28672,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "8b6a038d933d1cd0c3c437a3da63c889"
},
{
"dataPath": "params_shard_120.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.29.self_attn.qkv_proj.q_weight",
"shape": [
6144,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "6e93e3a6a6d402564552597008ae38c0"
},
{
"dataPath": "params_shard_121.bin",
"format": "raw-shard",
"nbytes": 16777216,
"records": [
{
"name": "model.layers.29.self_attn.o_proj.q_weight",
"shape": [
4096,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 0
}
],
"md5sum": "8f8ff2f3dcdee181c5c47271b8657ec0"
},
{
"dataPath": "params_shard_122.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.30.mlp.down_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "c346b823b610747743bcf583a5c46863"
},
{
"dataPath": "params_shard_123.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.30.mlp.gate_up_proj.q_weight",
"shape": [
28672,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "22eccb78a3ff232a096ae910246067d9"
},
{
"dataPath": "params_shard_124.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.30.self_attn.qkv_proj.q_weight",
"shape": [
6144,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "d7cd9ab067b247c7cadf02f4b95e6f00"
},
{
"dataPath": "params_shard_125.bin",
"format": "raw-shard",
"nbytes": 16777216,
"records": [
{
"name": "model.layers.30.self_attn.o_proj.q_weight",
"shape": [
4096,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 0
}
],
"md5sum": "dccf8e2f3a7b7c41f4cce1290552be06"
},
{
"dataPath": "params_shard_126.bin",
"format": "raw-shard",
"nbytes": 117440512,
"records": [
{
"name": "model.layers.31.mlp.gate_up_proj.q_weight",
"shape": [
28672,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 117440512,
"byteOffset": 0
}
],
"md5sum": "5bf2277e3399ef33710b832c49ad8ca2"
},
{
"dataPath": "params_shard_127.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.31.self_attn.qkv_proj.q_weight",
"shape": [
6144,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "a27ae0827ec4a3389a190aec8553ed7f"
},
{
"dataPath": "params_shard_128.bin",
"format": "raw-shard",
"nbytes": 16777216,
"records": [
{
"name": "model.layers.31.self_attn.o_proj.q_weight",
"shape": [
4096,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 16777216,
"byteOffset": 0
}
],
"md5sum": "cd39adb0a6389bc6e9a772a1740357e4"
},
{
"dataPath": "params_shard_129.bin",
"format": "raw-shard",
"nbytes": 25698560,
"records": [
{
"name": "model.layers.31.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 0
},
{
"name": "model.layers.31.mlp.down_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 8192
},
{
"name": "model.layers.31.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 8194
},
{
"name": "model.norm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 16386
},
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24578
},
{
"name": "model.layers.0.mlp.down_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 32770
},
{
"name": "model.layers.0.mlp.gate_up_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 32772
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32774
},
{
"name": "model.layers.0.self_attn.qkv_proj.q_weight",
"shape": [
6144,
4096
],
"dtype": "e4m3_float8",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 40966
},
{
"name": "model.layers.0.self_attn.qkv_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25206790
},
{
"name": "model.layers.0.self_attn.o_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25206792
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25206794
},
{
"name": "model.layers.1.mlp.down_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25214986
},
{
"name": "model.layers.1.mlp.gate_up_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25214988
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25214990
},
{
"name": "model.layers.1.self_attn.qkv_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25223182
},
{
"name": "model.layers.1.self_attn.o_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25223184
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25223186
},
{
"name": "model.layers.2.mlp.down_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25231378
},
{
"name": "model.layers.2.mlp.gate_up_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25231380
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25231382
},
{
"name": "model.layers.2.self_attn.qkv_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25239574
},
{
"name": "model.layers.2.self_attn.o_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25239576
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25239578
},
{
"name": "model.layers.3.mlp.down_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25247770
},
{
"name": "model.layers.3.mlp.gate_up_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25247772
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25247774
},
{
"name": "model.layers.3.self_attn.qkv_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25255966
},
{
"name": "model.layers.3.self_attn.o_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25255968
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25255970
},
{
"name": "model.layers.4.mlp.down_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25264162
},
{
"name": "model.layers.4.mlp.gate_up_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25264164
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25264166
},
{
"name": "model.layers.4.self_attn.qkv_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25272358
},
{
"name": "model.layers.4.self_attn.o_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25272360
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25272362
},
{
"name": "model.layers.5.mlp.down_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25280554
},
{
"name": "model.layers.5.mlp.gate_up_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25280556
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25280558
},
{
"name": "model.layers.5.self_attn.qkv_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25288750
},
{
"name": "model.layers.5.self_attn.o_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25288752
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25288754
},
{
"name": "model.layers.6.mlp.down_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25296946
},
{
"name": "model.layers.6.mlp.gate_up_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25296948
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25296950
},
{
"name": "model.layers.6.self_attn.qkv_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25305142
},
{
"name": "model.layers.6.self_attn.o_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25305144
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25305146
},
{
"name": "model.layers.7.mlp.down_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25313338
},
{
"name": "model.layers.7.mlp.gate_up_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25313340
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25313342
},
{
"name": "model.layers.7.self_attn.qkv_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25321534
},
{
"name": "model.layers.7.self_attn.o_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25321536
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25321538
},
{
"name": "model.layers.8.mlp.down_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25329730
},
{
"name": "model.layers.8.mlp.gate_up_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25329732
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25329734
},
{
"name": "model.layers.8.self_attn.qkv_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25337926
},
{
"name": "model.layers.8.self_attn.o_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25337928
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25337930
},
{
"name": "model.layers.10.mlp.down_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25346122
},
{
"name": "model.layers.10.mlp.gate_up_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25346124
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25346126
},
{
"name": "model.layers.10.self_attn.qkv_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25354318
},
{
"name": "model.layers.10.self_attn.o_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25354320
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25354322
},
{
"name": "model.layers.11.mlp.down_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25362514
},
{
"name": "model.layers.11.mlp.gate_up_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25362516
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25362518
},
{
"name": "model.layers.11.self_attn.qkv_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25370710
},
{
"name": "model.layers.11.self_attn.o_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25370712
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25370714
},
{
"name": "model.layers.12.mlp.down_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25378906
},
{
"name": "model.layers.12.mlp.gate_up_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25378908
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25378910
},
{
"name": "model.layers.12.self_attn.qkv_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25387102
},
{
"name": "model.layers.12.self_attn.o_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25387104
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25387106
},
{
"name": "model.layers.13.mlp.down_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25395298
},
{
"name": "model.layers.13.mlp.gate_up_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25395300
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25395302
},
{
"name": "model.layers.13.self_attn.qkv_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25403494
},
{
"name": "model.layers.13.self_attn.o_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25403496
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25403498
},
{
"name": "model.layers.14.mlp.down_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25411690
},
{
"name": "model.layers.14.mlp.gate_up_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25411692
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25411694
},
{
"name": "model.layers.14.self_attn.qkv_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25419886
},
{
"name": "model.layers.14.self_attn.o_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25419888
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25419890
},
{
"name": "model.layers.15.mlp.down_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25428082
},
{
"name": "model.layers.15.mlp.gate_up_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25428084
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25428086
},
{
"name": "model.layers.15.self_attn.qkv_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25436278
},
{
"name": "model.layers.15.self_attn.o_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25436280
},
{
"name": "model.layers.16.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25436282
},
{
"name": "model.layers.16.mlp.down_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25444474
},
{
"name": "model.layers.16.mlp.gate_up_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25444476
},
{
"name": "model.layers.16.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25444478
},
{
"name": "model.layers.16.self_attn.qkv_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25452670
},
{
"name": "model.layers.16.self_attn.o_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25452672
},
{
"name": "model.layers.17.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25452674
},
{
"name": "model.layers.17.mlp.down_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25460866
},
{
"name": "model.layers.17.mlp.gate_up_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25460868
},
{
"name": "model.layers.17.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25460870
},
{
"name": "model.layers.17.self_attn.qkv_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25469062
},
{
"name": "model.layers.17.self_attn.o_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25469064
},
{
"name": "model.layers.18.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25469066
},
{
"name": "model.layers.18.mlp.down_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25477258
},
{
"name": "model.layers.18.mlp.gate_up_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25477260
},
{
"name": "model.layers.18.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25477262
},
{
"name": "model.layers.18.self_attn.qkv_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25485454
},
{
"name": "model.layers.18.self_attn.o_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25485456
},
{
"name": "model.layers.19.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25485458
},
{
"name": "model.layers.19.mlp.down_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25493650
},
{
"name": "model.layers.19.mlp.gate_up_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25493652
},
{
"name": "model.layers.19.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25493654
},
{
"name": "model.layers.19.self_attn.qkv_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25501846
},
{
"name": "model.layers.19.self_attn.o_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25501848
},
{
"name": "model.layers.20.mlp.gate_up_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25501850
},
{
"name": "model.layers.20.self_attn.qkv_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25501852
},
{
"name": "model.layers.20.self_attn.o_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25501854
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25501856
},
{
"name": "model.layers.9.mlp.down_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25510048
},
{
"name": "model.layers.9.mlp.gate_up_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25510050
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25510052
},
{
"name": "model.layers.9.self_attn.qkv_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25518244
},
{
"name": "model.layers.9.self_attn.o_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25518246
},
{
"name": "model.layers.20.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25518248
},
{
"name": "model.layers.20.mlp.down_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25526440
},
{
"name": "model.layers.20.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25526442
},
{
"name": "model.layers.21.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25534634
},
{
"name": "model.layers.21.mlp.down_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25542826
},
{
"name": "model.layers.21.mlp.gate_up_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25542828
},
{
"name": "model.layers.21.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25542830
},
{
"name": "model.layers.21.self_attn.qkv_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25551022
},
{
"name": "model.layers.21.self_attn.o_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25551024
},
{
"name": "model.layers.22.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25551026
},
{
"name": "model.layers.22.mlp.down_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25559218
},
{
"name": "model.layers.22.mlp.gate_up_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25559220
},
{
"name": "model.layers.22.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25559222
},
{
"name": "model.layers.22.self_attn.qkv_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25567414
},
{
"name": "model.layers.22.self_attn.o_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25567416
},
{
"name": "model.layers.23.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25567418
},
{
"name": "model.layers.23.mlp.down_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25575610
},
{
"name": "model.layers.23.mlp.gate_up_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25575612
},
{
"name": "model.layers.23.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25575614
},
{
"name": "model.layers.23.self_attn.qkv_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25583806
},
{
"name": "model.layers.23.self_attn.o_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25583808
},
{
"name": "model.layers.24.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25583810
},
{
"name": "model.layers.24.mlp.down_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25592002
},
{
"name": "model.layers.24.mlp.gate_up_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25592004
},
{
"name": "model.layers.24.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25592006
},
{
"name": "model.layers.24.self_attn.qkv_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25600198
},
{
"name": "model.layers.24.self_attn.o_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25600200
},
{
"name": "model.layers.25.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25600202
},
{
"name": "model.layers.25.mlp.down_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25608394
},
{
"name": "model.layers.25.mlp.gate_up_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25608396
},
{
"name": "model.layers.25.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25608398
},
{
"name": "model.layers.25.self_attn.qkv_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25616590
},
{
"name": "model.layers.25.self_attn.o_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25616592
},
{
"name": "model.layers.26.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25616594
},
{
"name": "model.layers.26.mlp.down_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25624786
},
{
"name": "model.layers.26.mlp.gate_up_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25624788
},
{
"name": "model.layers.26.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25624790
},
{
"name": "model.layers.26.self_attn.qkv_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25632982
},
{
"name": "model.layers.26.self_attn.o_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25632984
},
{
"name": "model.layers.27.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25632986
},
{
"name": "model.layers.27.mlp.down_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25641178
},
{
"name": "model.layers.27.mlp.gate_up_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25641180
},
{
"name": "model.layers.27.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25641182
},
{
"name": "model.layers.27.self_attn.qkv_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25649374
},
{
"name": "model.layers.27.self_attn.o_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25649376
},
{
"name": "model.layers.28.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25649378
},
{
"name": "model.layers.28.mlp.down_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25657570
},
{
"name": "model.layers.28.mlp.gate_up_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25657572
},
{
"name": "model.layers.28.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25657574
},
{
"name": "model.layers.28.self_attn.qkv_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25665766
},
{
"name": "model.layers.28.self_attn.o_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25665768
},
{
"name": "model.layers.29.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25665770
},
{
"name": "model.layers.29.mlp.down_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25673962
},
{
"name": "model.layers.29.mlp.gate_up_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25673964
},
{
"name": "model.layers.29.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25673966
},
{
"name": "model.layers.29.self_attn.qkv_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25682158
},
{
"name": "model.layers.29.self_attn.o_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25682160
},
{
"name": "model.layers.30.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25682162
},
{
"name": "model.layers.30.mlp.down_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25690354
},
{
"name": "model.layers.30.mlp.gate_up_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25690356
},
{
"name": "model.layers.30.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25690358
},
{
"name": "model.layers.30.self_attn.qkv_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25698550
},
{
"name": "model.layers.30.self_attn.o_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25698552
},
{
"name": "model.layers.31.mlp.gate_up_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25698554
},
{
"name": "model.layers.31.self_attn.qkv_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25698556
},
{
"name": "model.layers.31.self_attn.o_proj.q_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 25698558
}
],
"md5sum": "92e36c254dd2d2bc7158d731c8413f02"
},
{
"dataPath": "params_shard_130.bin",
"format": "raw-shard",
"nbytes": 256,
"records": [
{
"name": "model.layers.0.self_attn.qkv_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 0
},
{
"name": "model.layers.0.self_attn.o_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 2
},
{
"name": "model.layers.0.mlp.gate_up_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 4
},
{
"name": "model.layers.0.mlp.down_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 6
},
{
"name": "model.layers.1.self_attn.qkv_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 8
},
{
"name": "model.layers.1.self_attn.o_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 10
},
{
"name": "model.layers.1.mlp.gate_up_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 12
},
{
"name": "model.layers.1.mlp.down_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 14
},
{
"name": "model.layers.2.self_attn.qkv_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 16
},
{
"name": "model.layers.2.self_attn.o_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 18
},
{
"name": "model.layers.2.mlp.gate_up_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 20
},
{
"name": "model.layers.2.mlp.down_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 22
},
{
"name": "model.layers.3.self_attn.qkv_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 24
},
{
"name": "model.layers.3.self_attn.o_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 26
},
{
"name": "model.layers.3.mlp.gate_up_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 28
},
{
"name": "model.layers.3.mlp.down_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 30
},
{
"name": "model.layers.4.self_attn.qkv_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 32
},
{
"name": "model.layers.4.self_attn.o_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 34
},
{
"name": "model.layers.4.mlp.gate_up_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 36
},
{
"name": "model.layers.4.mlp.down_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 38
},
{
"name": "model.layers.5.self_attn.qkv_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 40
},
{
"name": "model.layers.5.self_attn.o_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 42
},
{
"name": "model.layers.5.mlp.gate_up_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 44
},
{
"name": "model.layers.5.mlp.down_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 46
},
{
"name": "model.layers.6.self_attn.qkv_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 48
},
{
"name": "model.layers.6.self_attn.o_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 50
},
{
"name": "model.layers.6.mlp.gate_up_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 52
},
{
"name": "model.layers.6.mlp.down_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 54
},
{
"name": "model.layers.7.self_attn.qkv_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 56
},
{
"name": "model.layers.7.self_attn.o_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 58
},
{
"name": "model.layers.7.mlp.gate_up_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 60
},
{
"name": "model.layers.7.mlp.down_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 62
},
{
"name": "model.layers.8.self_attn.qkv_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 64
},
{
"name": "model.layers.8.self_attn.o_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 66
},
{
"name": "model.layers.8.mlp.gate_up_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 68
},
{
"name": "model.layers.8.mlp.down_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 70
},
{
"name": "model.layers.9.self_attn.qkv_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 72
},
{
"name": "model.layers.9.self_attn.o_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 74
},
{
"name": "model.layers.9.mlp.gate_up_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 76
},
{
"name": "model.layers.9.mlp.down_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 78
},
{
"name": "model.layers.10.self_attn.qkv_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 80
},
{
"name": "model.layers.10.self_attn.o_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 82
},
{
"name": "model.layers.10.mlp.gate_up_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 84
},
{
"name": "model.layers.10.mlp.down_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 86
},
{
"name": "model.layers.11.self_attn.qkv_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 88
},
{
"name": "model.layers.11.self_attn.o_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 90
},
{
"name": "model.layers.11.mlp.gate_up_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 92
},
{
"name": "model.layers.11.mlp.down_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 94
},
{
"name": "model.layers.12.self_attn.qkv_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 96
},
{
"name": "model.layers.12.self_attn.o_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 98
},
{
"name": "model.layers.12.mlp.gate_up_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 100
},
{
"name": "model.layers.12.mlp.down_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 102
},
{
"name": "model.layers.13.self_attn.qkv_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 104
},
{
"name": "model.layers.13.self_attn.o_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 106
},
{
"name": "model.layers.13.mlp.gate_up_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 108
},
{
"name": "model.layers.13.mlp.down_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 110
},
{
"name": "model.layers.14.self_attn.qkv_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 112
},
{
"name": "model.layers.14.self_attn.o_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 114
},
{
"name": "model.layers.14.mlp.gate_up_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 116
},
{
"name": "model.layers.14.mlp.down_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 118
},
{
"name": "model.layers.15.self_attn.qkv_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 120
},
{
"name": "model.layers.15.self_attn.o_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 122
},
{
"name": "model.layers.15.mlp.gate_up_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 124
},
{
"name": "model.layers.15.mlp.down_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 126
},
{
"name": "model.layers.16.self_attn.qkv_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 128
},
{
"name": "model.layers.16.self_attn.o_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 130
},
{
"name": "model.layers.16.mlp.gate_up_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 132
},
{
"name": "model.layers.16.mlp.down_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 134
},
{
"name": "model.layers.17.self_attn.qkv_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 136
},
{
"name": "model.layers.17.self_attn.o_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 138
},
{
"name": "model.layers.17.mlp.gate_up_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 140
},
{
"name": "model.layers.17.mlp.down_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 142
},
{
"name": "model.layers.18.self_attn.qkv_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 144
},
{
"name": "model.layers.18.self_attn.o_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 146
},
{
"name": "model.layers.18.mlp.gate_up_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 148
},
{
"name": "model.layers.18.mlp.down_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 150
},
{
"name": "model.layers.19.self_attn.qkv_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 152
},
{
"name": "model.layers.19.self_attn.o_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 154
},
{
"name": "model.layers.19.mlp.gate_up_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 156
},
{
"name": "model.layers.19.mlp.down_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 158
},
{
"name": "model.layers.20.self_attn.qkv_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 160
},
{
"name": "model.layers.20.self_attn.o_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 162
},
{
"name": "model.layers.20.mlp.gate_up_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 164
},
{
"name": "model.layers.20.mlp.down_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 166
},
{
"name": "model.layers.21.self_attn.qkv_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 168
},
{
"name": "model.layers.21.self_attn.o_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 170
},
{
"name": "model.layers.21.mlp.gate_up_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 172
},
{
"name": "model.layers.21.mlp.down_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 174
},
{
"name": "model.layers.22.self_attn.qkv_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 176
},
{
"name": "model.layers.22.self_attn.o_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 178
},
{
"name": "model.layers.22.mlp.gate_up_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 180
},
{
"name": "model.layers.22.mlp.down_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 182
},
{
"name": "model.layers.23.self_attn.qkv_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 184
},
{
"name": "model.layers.23.self_attn.o_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 186
},
{
"name": "model.layers.23.mlp.gate_up_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 188
},
{
"name": "model.layers.23.mlp.down_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 190
},
{
"name": "model.layers.24.self_attn.qkv_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 192
},
{
"name": "model.layers.24.self_attn.o_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 194
},
{
"name": "model.layers.24.mlp.gate_up_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 196
},
{
"name": "model.layers.24.mlp.down_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 198
},
{
"name": "model.layers.25.self_attn.qkv_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 200
},
{
"name": "model.layers.25.self_attn.o_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 202
},
{
"name": "model.layers.25.mlp.gate_up_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 204
},
{
"name": "model.layers.25.mlp.down_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 206
},
{
"name": "model.layers.26.self_attn.qkv_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 208
},
{
"name": "model.layers.26.self_attn.o_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 210
},
{
"name": "model.layers.26.mlp.gate_up_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 212
},
{
"name": "model.layers.26.mlp.down_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 214
},
{
"name": "model.layers.27.self_attn.qkv_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 216
},
{
"name": "model.layers.27.self_attn.o_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 218
},
{
"name": "model.layers.27.mlp.gate_up_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 220
},
{
"name": "model.layers.27.mlp.down_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 222
},
{
"name": "model.layers.28.self_attn.qkv_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 224
},
{
"name": "model.layers.28.self_attn.o_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 226
},
{
"name": "model.layers.28.mlp.gate_up_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 228
},
{
"name": "model.layers.28.mlp.down_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 230
},
{
"name": "model.layers.29.self_attn.qkv_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 232
},
{
"name": "model.layers.29.self_attn.o_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 234
},
{
"name": "model.layers.29.mlp.gate_up_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 236
},
{
"name": "model.layers.29.mlp.down_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 238
},
{
"name": "model.layers.30.self_attn.qkv_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 240
},
{
"name": "model.layers.30.self_attn.o_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 242
},
{
"name": "model.layers.30.mlp.gate_up_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 244
},
{
"name": "model.layers.30.mlp.down_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 246
},
{
"name": "model.layers.31.self_attn.qkv_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 248
},
{
"name": "model.layers.31.self_attn.o_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 250
},
{
"name": "model.layers.31.mlp.gate_up_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 252
},
{
"name": "model.layers.31.mlp.down_proj.q_calibration_scale",
"shape": [
1
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2,
"byteOffset": 254
}
],
"md5sum": "163928bdc819c2c30d9959aa4f9adde7"
}
]
}