{ "metadata": { "ParamSize": 803, "ParamBytes": 72656372992.0, "BitsPerParam": 7.974819159215532 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 2101346304, "records": [ { "name": "lm_head.weight", "shape": [ 128256, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2101346304, "byteOffset": 0 } ], "md5sum": "b450ad6564dd69ce196a7a66a8c66182" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 2101346304, "records": [ { "name": "model.embed_tokens.weight", "shape": [ 128256, 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2101346304, "byteOffset": 0 } ], "md5sum": "309be16492736a951c706ce5cd988ca1" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "797cfd37229f030c162a31ca1cb8b0dc" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "bef7c44ed2d080ed4358eb51385c27fd" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.0.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "1ef2429046a53c9d399a2917496c3644" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "2ab2d0a67816ef1836379327fcc519a2" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "fec9a3ec6955cd3d94d77adb87677686" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.1.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "b62c6f93216e9a37181a7bb237864e2c" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "6f090a4991ed582ef9a71e2c0d1e566d" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "940457edfe2a861c434c6c50ac2f66a9" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "598de1fab03ef0a9ebadb73a6104dde9" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "c0df5d3e67a6ffe2c7988c09b088d320" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.2.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "0fd8bcca57bee283ecdac25b12b29731" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "920b40904a5ced8e25afe586a06a2c89" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "7fd84684a5700f544e270a325aaa84a7" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "e23177b40fc2425da1dd50fe828be6a9" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.3.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "ce17ffc4528b50e274af94a2ff6f8538" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "f64ef48625d1e104a0fb985c69c0182d" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.4.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "7d8b7850356018aed9df334a9da05acd" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "45d220179063b33c173792305eca6ae3" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "7a6432de03b238e673b1b0cbac40973c" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "11f9b80d8d322938d2a22aba30baf424" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.10.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "5f8b664b7aa863e9ef3e620e272a4032" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "f74d8c58abc34a02b2917f3f97224872" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "ffa0ef958981d7a121d2319271819fac" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "af0a352a7c4402da245352062e91d1cd" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.11.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "ce08b2c98b6cb7a110aa093a34421fb3" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "4741d3ad7694df52870a859633ea620c" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "7de99ba3f772c551c22fc79978029341" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.12.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "1b0c6aaa943ea8d8cf7ac4b2effea528" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.12.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "0bd1381bf025a121130b03bdc8f7bed0" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.12.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "b53ab505054502b207db81198f211666" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.13.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "9b0db11af63270291ec27e8505e13d6f" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "5c7d1f1136b14788da47bf629ecc6e2e" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.13.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "543bb746335ccf7491619ebe2e7f1c3e" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.13.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "5c43136ed1e71843e031889dcfbad104" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.14.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "94a5b8ac8e3828aabffefbc4a0745757" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "c1ae32f04e71c50b56bbf54598e46248" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.14.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "0b6380399d035b09e712f7ca0dabf74a" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.14.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "0bab33f53a5d3dc3053fd723ce8f59dc" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "7c7e8073c1b12462c373b2fb4511ffe1" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.15.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "3cf44f7f71d81e22c4fa93cd27bca168" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "cd1d0121161be1105ec6056138e6de9f" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.15.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "d55bba6b4623d75aff034fbe29977c93" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.16.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "1e5b67d012cf5bec8f21263b94132823" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "16c579770bdc7203fb1c15a30be5e747" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.16.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "30631b8f70ebd2eab457db5eb3a1d61d" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.16.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "d69087ad3d2b43fd142e9afc5e17086d" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.17.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "7d533331c7e2dd0c7feb32d9d1f718c1" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "68ba3e28dadc0f281e40d8f915925c7f" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.17.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "5ca627fa46d02b7099549f4da40d7a9f" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.17.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "35d2aa7e583d7e8ed31b103d092d68d2" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.18.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "85083020920b23a76c10ed04f380a807" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.18.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "5bdff2f392f0804b5059ee97798fb9e6" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.18.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "fe8758ae9bd2a93b207ff3ee723a1111" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "20d29e0d44ab4b4a94229300b8537b87" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.19.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "ce9da9d0ae00a3e730b1edf5e913b850" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "da93cb0b7f4cf9fa895ada3cfd706131" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.19.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "19049bd95baa50d73b9040e241ff8e2c" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.19.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "23ab931b4261417d39ce0da2fada27b9" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.20.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "a7cb50d94b5543893486ab09ce3a4607" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "2584050d9066d45faca22797e136f030" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.20.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "9916dccb3ba835fbc6da5596fbfa3835" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.20.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "e39bd62dd91ecc9b4d71802246a8f06c" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.21.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "a083a5ae6b04b1f3f0429bcf8701339a" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.21.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "3478326a1ff16259fe9ac3ae9bf83f7e" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "7a160e43b5455522a5b383d28c3f8068" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.21.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "e7e3053f6ebd3c133ca96fd7e38c6f91" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.22.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "442a7bf7223d1aaa97c37a9103671b84" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "f19ce5ae0a7990123d41f2b8a47e3cf8" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.22.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "9f4625563a90399deb69b5e7f7b19ed1" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.22.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "c2ea24c720201b4329f137caad792ce9" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.23.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "03ad1e19d8380394696118780176e574" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "8d2785de78aaa291da576d3ffb15ef5b" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.23.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "073c0efd3a39470d2cfee04381891409" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.23.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "a5319c5baa2ef0e5039c53fb6adb1d09" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.24.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "0bf4630d716d5cece7bcfa606a9eba79" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "29847eff9e993ce7cfe1e42da955cd1e" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.24.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "a4a60d4c10309c9d76b2d82bd346e96c" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.24.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "2b74d6fed1536da694081b7108452a40" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.25.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "8f63e4bc22bf5980e85ee44eaa296dda" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "372d993a6d094d6d2054a48106c96f24" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.25.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "a7d86e2f9f19ec285e72a15f0e554ef0" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.25.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "911bec90f66ada4405e9998de55d3901" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "8b633cefa0b7c3794a0757ca719804f3" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.26.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "378b1ca4d5e846546cb130f7ff0a2420" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.26.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "967a75a9fddd48bbddd54e40ec1ffb3c" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.26.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "c4ce3288df351f184495461644a00b59" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.27.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "3bd19456fa54b12519c367312baa7fba" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "4bb9e3b8d19568589d4663cc05ebc1a7" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.27.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "fe2206acb00a46c7868bf7cac6c27093" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.27.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "68b26a27560ead857d460b4159517fc1" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.28.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "56900a10ff7e722af7a70218dfcd4499" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "3846c8c940a3ae41e3057f98c2635381" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.28.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "c966fdd6d5786ec9f8a3b99b726f379b" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.28.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "16e603175c392bf32f4978e7e598a4aa" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "6bf6a369663a91dc6f6a6d8a254cbdb0" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.29.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "ac70ac93669ff848a0ff7018380fd72b" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.29.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "1c1f6e0c25ed4aeea1db2e907a87fb51" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.29.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "860731352abd5f81f505152a059739cd" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.30.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "ed755d14b493a75f7bf6f28d29c30077" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "78da8af090aa97327d413a820327b175" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.30.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "a06a9cc40d997487fbdfc6999a9bc22d" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.30.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "4e8a871d7d35f44fe8effe57667cd950" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.31.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "b036939008d8c9b52c4142b65f4b86af" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "e6699abb579e38549de1f2924e74f9eb" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.31.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "9e552efcae3b2464a21d08b1bf36d340" }, { "dataPath": "params_shard_107.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.31.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "8a1c0529478df4f3584e652a68fb3f61" }, { "dataPath": "params_shard_108.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.32.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "b428e6b8309838d12cfe50b64ec2d6c9" }, { "dataPath": "params_shard_109.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.32.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "faabf864cf5a7549b3a59dd41783ebe3" }, { "dataPath": "params_shard_110.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.32.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "86379f769dff50378df13638f55c6e30" }, { "dataPath": "params_shard_111.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.32.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "9a8a05339b752995403f0abdf26f9e91" }, { "dataPath": "params_shard_112.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.33.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "5b32c666bfa36e161921eb76bce039d5" }, { "dataPath": "params_shard_113.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.33.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "2bd9a88fa11bf8f1a2f8e906736cd0f3" }, { "dataPath": "params_shard_114.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.33.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "3d8ea10ed5c3141baedacbe071b36ae7" }, { "dataPath": "params_shard_115.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.33.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "935404740012cae8cd2b7e25eb12dbf2" }, { "dataPath": "params_shard_116.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.34.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "f8021b7ef1516cdb24ccf6e02df223e8" }, { "dataPath": "params_shard_117.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.34.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "4c813ac501f8ae9c2deffb8bf46f18e0" }, { "dataPath": "params_shard_118.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.34.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "89acaa356bf9e4243860fe4cb84c0c47" }, { "dataPath": "params_shard_119.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.34.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "63910765915d469ac20be486310d0c92" }, { "dataPath": "params_shard_120.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.35.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "4568fbc2c56ba9e1638eb3e1b651eefd" }, { "dataPath": "params_shard_121.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.35.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "4cb741a6e225d8b2bbd653c5bc075078" }, { "dataPath": "params_shard_122.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.35.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "1044682dbfe15115f78ad4f354d71905" }, { "dataPath": "params_shard_123.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.35.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "7f39ce6ca289ac6ac6adb6a153cd66a9" }, { "dataPath": "params_shard_124.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.36.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "a1ce7dfea3a1a6c9b19388ddc2a07545" }, { "dataPath": "params_shard_125.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.36.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "eff5c99d9481a365806c23c40f9bb598" }, { "dataPath": "params_shard_126.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.36.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "fb5228e36bbbc7030fed74844ee4ff2c" }, { "dataPath": "params_shard_127.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.36.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "b0500301e26f4352a3562091dd80485b" }, { "dataPath": "params_shard_128.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.37.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "c510648f77883c1974e91a4849c1bc58" }, { "dataPath": "params_shard_129.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.37.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "320e2de7c8eb870a473f8243e8be1ca8" }, { "dataPath": "params_shard_130.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.37.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "198b9fcc168b813c513d8ae5b559a5b1" }, { "dataPath": "params_shard_131.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.37.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "058416cf1440072de887057911ad1340" }, { "dataPath": "params_shard_132.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.38.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "54e41ed6c4fda43508d43fd4c08c44ad" }, { "dataPath": "params_shard_133.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.38.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "612c05bd0626b92454f06e9a19f8bd7c" }, { "dataPath": "params_shard_134.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.38.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "b05ea46fd83c8c61e39edc62c9df0874" }, { "dataPath": "params_shard_135.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.38.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "b1c374bea953bfdb3b107ae15c3c6453" }, { "dataPath": "params_shard_136.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.39.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "f4c0d59bf7ac1e45d7f0492a74f8221b" }, { "dataPath": "params_shard_137.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.39.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "1b8e06371ab577b847e03ef7e495eb47" }, { "dataPath": "params_shard_138.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.39.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "f83b078d95b192e74841f2d85ffebc77" }, { "dataPath": "params_shard_139.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.39.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "ce5203ba040e20f74f392574f6ec2c27" }, { "dataPath": "params_shard_140.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.40.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "2b5c724c1787dce3bc5b40b657a9f0c3" }, { "dataPath": "params_shard_141.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.40.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "0acd132c00f93037e84cacaa0c901db2" }, { "dataPath": "params_shard_142.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.40.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "5d083551fd1154a4eaec730b4f669ae7" }, { "dataPath": "params_shard_143.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "3de10e670a8d92268d55a95eb4b39cec" }, { "dataPath": "params_shard_144.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "9f1f917e4ccb64d6d314aa0e717a6de9" }, { "dataPath": "params_shard_145.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "51bc54e46c05f26fe97d5d64aec88115" }, { "dataPath": "params_shard_146.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "a312eb11689ee4a6af9e8a6205323948" }, { "dataPath": "params_shard_147.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.5.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "2be714f964f3d01032ab85611bab16d8" }, { "dataPath": "params_shard_148.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "db5470409b80507b402f77b30d981522" }, { "dataPath": "params_shard_149.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "4d209c240ffcdd32a4c85971f955a3e0" }, { "dataPath": "params_shard_150.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "39319df4503463297554203260385952" }, { "dataPath": "params_shard_151.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.6.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "9e1ba5f4410d11707bfc2350256fbc2c" }, { "dataPath": "params_shard_152.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "eb3fde29c768decfa59683b10bc34cca" }, { "dataPath": "params_shard_153.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.7.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "68179c2bf36f76dc5a5bf9dd3ee99802" }, { "dataPath": "params_shard_154.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.40.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "ced073a5194fa57d07cc4a2995c494ef" }, { "dataPath": "params_shard_155.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.41.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "5a9e87858f202b5568e8fef9a5f032b2" }, { "dataPath": "params_shard_156.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.41.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "688c514c3e735e7d4efc7ee2df3d5a8c" }, { "dataPath": "params_shard_157.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.41.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "ab9d00ae6acd35d586802ac94912005b" }, { "dataPath": "params_shard_158.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.41.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "05bd4d6d964f0abb383c8f061db2ef90" }, { "dataPath": "params_shard_159.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.42.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "ce63beb60f1dfd8371c696bf3d5b8b3d" }, { "dataPath": "params_shard_160.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.42.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "7b3c24455e8e344e545234353bbb73cd" }, { "dataPath": "params_shard_161.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.42.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "98db56c72f28bd692bafc43550f84fc1" }, { "dataPath": "params_shard_162.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.42.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "bea937c98812d62afdb1a82444d8d1e8" }, { "dataPath": "params_shard_163.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.43.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "6afaa589d657620509282d2d6659fd92" }, { "dataPath": "params_shard_164.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.43.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "4595e8d02a7b79367fa898152434b894" }, { "dataPath": "params_shard_165.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.43.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "5bbfb5340bbfc5d6ac08b02fbac35521" }, { "dataPath": "params_shard_166.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.43.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "fffc8202c774831dbba55b14960744b3" }, { "dataPath": "params_shard_167.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.44.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "ec36939eb51dd07b5eaf4603338299e5" }, { "dataPath": "params_shard_168.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.44.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "73524b4f19dfcf6b6f2140428a725563" }, { "dataPath": "params_shard_169.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.44.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "9cbb0b020fe1301effcb230a81d2093e" }, { "dataPath": "params_shard_170.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.44.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "8564ba637ef6eb63671646b13f4ecbcf" }, { "dataPath": "params_shard_171.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.45.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "f90f1bf74663bec68a48a5047d13be2c" }, { "dataPath": "params_shard_172.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.45.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "0cf5103dddb071c5fb81b10c525f41d1" }, { "dataPath": "params_shard_173.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.45.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "701d5ed4d7a22cc994759eaa552e3a6f" }, { "dataPath": "params_shard_174.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.45.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "df28c85499d563c541d4d5d4825de22e" }, { "dataPath": "params_shard_175.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.46.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "880de919a371642c18a2b7571336dc74" }, { "dataPath": "params_shard_176.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.46.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "336059f899161f73cf889965523e54cf" }, { "dataPath": "params_shard_177.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.46.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "ba901a961e071a8fcceeeda99236e138" }, { "dataPath": "params_shard_178.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.46.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "322ee9b5e373adad5e453cd1b25a36eb" }, { "dataPath": "params_shard_179.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.47.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "0eb7d4efd04b1a51ae01238f4f71607c" }, { "dataPath": "params_shard_180.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.47.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "9a1d9d3e3d82bbe642b4103f001fc4bf" }, { "dataPath": "params_shard_181.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.47.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "f70ab1d7ca16b65650c15c0d8a51db39" }, { "dataPath": "params_shard_182.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.47.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "97dd4670c72a55f39e79cb9bd02c192b" }, { "dataPath": "params_shard_183.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.48.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "c71936745fac05f7e32b697453ab3321" }, { "dataPath": "params_shard_184.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.48.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "3a4d353d3a8776386dc7ed78058b399b" }, { "dataPath": "params_shard_185.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.48.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "5aeaaab66ecce4ce775004a4dbd5824a" }, { "dataPath": "params_shard_186.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.48.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "409b178b713de43f236a4e15d59a8498" }, { "dataPath": "params_shard_187.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.49.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "124bc4a69a58d0adfdca10cd08489900" }, { "dataPath": "params_shard_188.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.49.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "5c8e2158a2e72e6e5e5c5b60332dd3da" }, { "dataPath": "params_shard_189.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.49.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "1c23a392b40679fb955febca06c506ce" }, { "dataPath": "params_shard_190.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.49.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "66f5485d89f67710688aa42619366dd8" }, { "dataPath": "params_shard_191.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.50.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "849a69d3105c6297ae888ca24b5ef758" }, { "dataPath": "params_shard_192.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.50.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "2053ef3ce843d79bf73e82c1795e115f" }, { "dataPath": "params_shard_193.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.50.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "28083fd9026e262d993fe9a86b165682" }, { "dataPath": "params_shard_194.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.50.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "24262643adea36c24108b34544bb27c3" }, { "dataPath": "params_shard_195.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.51.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "289f30e51ad56daab06792e6b7b11d39" }, { "dataPath": "params_shard_196.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.51.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "4ab0f4516a273d0aaafec0ee29229681" }, { "dataPath": "params_shard_197.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.51.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "71401bfbe3721db7870f8722edeb1751" }, { "dataPath": "params_shard_198.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.51.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "d61719aa66adc2ca042a9debc2474851" }, { "dataPath": "params_shard_199.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.52.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "3e6cf21f546fe81b66d152c65ce25cee" }, { "dataPath": "params_shard_200.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.52.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "3149070358e4f695de1587e0b40b26cd" }, { "dataPath": "params_shard_201.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.52.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "7550f2936b9fadfe5b5dcae971d1e9e2" }, { "dataPath": "params_shard_202.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.52.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "c9dad67f8312f209af66faf384e337c5" }, { "dataPath": "params_shard_203.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.53.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "65a85b6688ca9c45e6b5b78fa49fc8b4" }, { "dataPath": "params_shard_204.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.53.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "e85655b1a4b62640054169601d79dc13" }, { "dataPath": "params_shard_205.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.53.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "56cc3a0f9128b2184b290110ed195ad3" }, { "dataPath": "params_shard_206.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.53.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "315a1f00462d3096ca318c604f1908c9" }, { "dataPath": "params_shard_207.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.54.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "665c6a6e56cb342b27d5c467c0a54218" }, { "dataPath": "params_shard_208.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.54.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "bc3d4f5451fb629377d2e3a1eb6c68a9" }, { "dataPath": "params_shard_209.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.54.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "4c6af06bc8b2dfb14470a7a0f06f7466" }, { "dataPath": "params_shard_210.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.54.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "d69aead6b77d7efd5ad054a9ceda9980" }, { "dataPath": "params_shard_211.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.55.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "efc1038f95a7c91637b6a495d56501c2" }, { "dataPath": "params_shard_212.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.55.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "d0bfa6a0d0e11ed613a3929ae03f2c4c" }, { "dataPath": "params_shard_213.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.55.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "27080a2884f1b466110800e99e090cff" }, { "dataPath": "params_shard_214.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.55.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "dbeede2c27447137e9f3ad0a93fd2cb2" }, { "dataPath": "params_shard_215.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.56.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "02740f2c04da12951504adff6c0b7fca" }, { "dataPath": "params_shard_216.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.56.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "26e57944ac7b2d3f9b5f87ee7106cd1a" }, { "dataPath": "params_shard_217.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.56.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "3d1b0525a0b46e993b8c8f519c616b32" }, { "dataPath": "params_shard_218.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.56.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "3359361aa99ef0f466f2fe00b7491744" }, { "dataPath": "params_shard_219.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.57.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "367ad87ce6e00da1efa31a0560c493b3" }, { "dataPath": "params_shard_220.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.57.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "099544a5fcea86c8ed7201d93e251561" }, { "dataPath": "params_shard_221.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.57.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "82c66372dde3e01598be4922e702a4b5" }, { "dataPath": "params_shard_222.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.57.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "893fa4313736a16208e34e88dd7f9676" }, { "dataPath": "params_shard_223.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.58.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "1fe83c81c8e51fab0c0a2de35edfce2d" }, { "dataPath": "params_shard_224.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.58.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "7d477bf098cd0ebd9c10061c86429a60" }, { "dataPath": "params_shard_225.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.58.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "d1bcb4069def2c871aba3723e894e894" }, { "dataPath": "params_shard_226.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.58.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "2bdb3f83aa4684de8ff3c32e769c721d" }, { "dataPath": "params_shard_227.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.59.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "241c2fb0eb8e2dfa485fb8cf9306c992" }, { "dataPath": "params_shard_228.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.59.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "ba272ba71b65054c23b5be18204a4c01" }, { "dataPath": "params_shard_229.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.59.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "d9b1467a79a1610ad87943e55df47268" }, { "dataPath": "params_shard_230.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.59.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "0640462d484442ad40c10cb5761e57d2" }, { "dataPath": "params_shard_231.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.60.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "1c34540f85020970d2d3dd8e9d00f6da" }, { "dataPath": "params_shard_232.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.60.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "200855e757c63ad5ff5173d676a12c37" }, { "dataPath": "params_shard_233.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.60.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "ee911906a686fa8f76aed4ef02ead1a4" }, { "dataPath": "params_shard_234.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.60.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "07cc314f09f934c3143ee09fef1bd698" }, { "dataPath": "params_shard_235.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.61.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "d3f4deff995747a359a515bd422f490a" }, { "dataPath": "params_shard_236.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.61.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "1282748b15f861be4ee9b1067af5ab1b" }, { "dataPath": "params_shard_237.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.61.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "d624b9ae120437c1a0a2610394f55fa2" }, { "dataPath": "params_shard_238.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.61.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "ee1d7c992b179dcb06a017899e89410f" }, { "dataPath": "params_shard_239.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.62.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "8fa52fc95d4c6e0768a6e977e84daba0" }, { "dataPath": "params_shard_240.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.62.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "fdb4e5e56356bfd208ec1803d2a95041" }, { "dataPath": "params_shard_241.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.62.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "767cd360012e74c64e1e8c69f0f9c9b8" }, { "dataPath": "params_shard_242.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.62.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "e0b18e4485fb443137bf6bdea536e32f" }, { "dataPath": "params_shard_243.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.63.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "22831efe05d52f802fec2b2b6a763894" }, { "dataPath": "params_shard_244.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.63.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "29e9ecdee83fc0d9146c85860e79be15" }, { "dataPath": "params_shard_245.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.63.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "03a28d88e967b1c5b24da5535161212d" }, { "dataPath": "params_shard_246.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.63.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "4c4e4a648fa391a984ccb13da30db21a" }, { "dataPath": "params_shard_247.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.64.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "6ab53c91e25e2a6b037a19398b16cadc" }, { "dataPath": "params_shard_248.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.64.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "daa553383212ba2648755d7ca97c4d3f" }, { "dataPath": "params_shard_249.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.64.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "5409cf7efb4afd58aaaba04d774359f5" }, { "dataPath": "params_shard_250.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.64.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "9b525878fd653748c2cb874c5ff9f38d" }, { "dataPath": "params_shard_251.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.65.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "ab5ea1b8b29cac9c05cde25c13c492a9" }, { "dataPath": "params_shard_252.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.65.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "2c43d9bb82aad7b7ceacc3d8146e4e31" }, { "dataPath": "params_shard_253.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.65.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "8127b864a905eccf0b26bd138e2a61d5" }, { "dataPath": "params_shard_254.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.65.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "e55e23016bbb73a4f8f729c4f468a718" }, { "dataPath": "params_shard_255.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.66.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "8527d263da170977035324286b1fa62c" }, { "dataPath": "params_shard_256.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.66.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "a50f9422dd4e822543bfaebf15705fa0" }, { "dataPath": "params_shard_257.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.66.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "621b380c2f32b0237b810e70b65d6dff" }, { "dataPath": "params_shard_258.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.66.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "d361f3ebdfec0883d91d1a2968e34f3f" }, { "dataPath": "params_shard_259.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.67.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "8435c418221c476904fe163e504da299" }, { "dataPath": "params_shard_260.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.67.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "ef98eb3490660e41e98d284ab102fa23" }, { "dataPath": "params_shard_261.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.67.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "a3f17268626d1a2c8a92f529c99ec68e" }, { "dataPath": "params_shard_262.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.67.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "9446fbe06a354c96dc26e4d8694603f5" }, { "dataPath": "params_shard_263.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.68.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "f160899c73a2b7eb19234311bc5a0cab" }, { "dataPath": "params_shard_264.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.68.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "8c12af27b49f7eece96d12982995f0bd" }, { "dataPath": "params_shard_265.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.68.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "7c702a9e8dd72734ba07a0166d363e51" }, { "dataPath": "params_shard_266.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.68.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "c9e726a1d0b27561b762735568d8c1e1" }, { "dataPath": "params_shard_267.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.69.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "50f9ef198c28e9eb6d0c272610938e16" }, { "dataPath": "params_shard_268.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.69.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "9f6bd10bc82295ea9766f302b2b1c4b8" }, { "dataPath": "params_shard_269.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.69.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "f2dcf8116e9e69e24d0e509a934351f8" }, { "dataPath": "params_shard_270.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.69.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "deaed9960ddfa6ed4b140b773e62445a" }, { "dataPath": "params_shard_271.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.70.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "94c0f3051e47f0d76c18cbc6ae0434ee" }, { "dataPath": "params_shard_272.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.70.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "43a627c7ac175a04219e913ffacbd5b3" }, { "dataPath": "params_shard_273.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.70.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "15a7a4a4d52d2ea5784c973e5151c48e" }, { "dataPath": "params_shard_274.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.70.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "2147fdc9c5929e40efa304358b673e24" }, { "dataPath": "params_shard_275.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.71.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "a9a6459b5e520c55f239fc94d058382d" }, { "dataPath": "params_shard_276.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.71.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "6567fee89338570d687615f0c8452dbe" }, { "dataPath": "params_shard_277.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.71.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "5e323cfb822efe63ac8822524581fa69" }, { "dataPath": "params_shard_278.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "96c2fa3b1b3efdfa70df57f0b0676acf" }, { "dataPath": "params_shard_279.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "8e777f93fbb9fcca78d94296d9206654" }, { "dataPath": "params_shard_280.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "b51903bf228bbae2fa097ebbb7083f59" }, { "dataPath": "params_shard_281.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "d405efb5211fa003422cc46bcefe540c" }, { "dataPath": "params_shard_282.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "36ba0e3289795904659ba6aad54678c9" }, { "dataPath": "params_shard_283.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.8.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "b83682cc2651fcc57de113bb88a48d5f" }, { "dataPath": "params_shard_284.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "44eb062ed650153bae9febce3f4993f2" }, { "dataPath": "params_shard_285.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "a1ccb8bb2f58ab6ba4db2b1dd0a58871" }, { "dataPath": "params_shard_286.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "127e2582e21da84059ebc2c909bb8d1d" }, { "dataPath": "params_shard_287.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.9.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "e7bbedb4258c1745340f2a86f98db514" }, { "dataPath": "params_shard_288.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "eca3173dd5fda2e87a524a20b6fc414d" }, { "dataPath": "params_shard_289.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.71.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "6205f103eee621b3d09d706b262de16a" }, { "dataPath": "params_shard_290.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.72.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "bcb878d0513ae913c0095a2a285ccc31" }, { "dataPath": "params_shard_291.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.72.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "5f2827e0688fcbff8012d2947af7d690" }, { "dataPath": "params_shard_292.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.72.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "21e6a4c9af805fa55c2aab3fda61310f" }, { "dataPath": "params_shard_293.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.72.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "e79d263a5560df151d9ae5bf2bb8fbbb" }, { "dataPath": "params_shard_294.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.73.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "bb53d44f5b932081354614705de77822" }, { "dataPath": "params_shard_295.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.73.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "220e0d9c014072514b076eed771443d9" }, { "dataPath": "params_shard_296.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.73.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "bbeef1d9424bcd46f5ceaa499dc492f2" }, { "dataPath": "params_shard_297.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.73.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "7a692adb9c99e68ee4f05686b50ea1c7" }, { "dataPath": "params_shard_298.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.74.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "c2d2110a27d43bf441fa18569d2019e8" }, { "dataPath": "params_shard_299.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.74.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "172a5857f83843f277470fc5338cbf25" }, { "dataPath": "params_shard_300.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.74.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "505cc786c17f094efeb6528c8a468bb3" }, { "dataPath": "params_shard_301.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.74.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "d8d762a07a9cc608d08d9a5be00779d6" }, { "dataPath": "params_shard_302.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.75.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "c62915c887ef5ebd065866e65e640578" }, { "dataPath": "params_shard_303.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.75.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "3834e98888e3b8d3115cdd916fe225b0" }, { "dataPath": "params_shard_304.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.75.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "a2536778e7d8162770e0a49bed2b5ea9" }, { "dataPath": "params_shard_305.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.75.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "9895c390535a00ee319cd6cbcf78202b" }, { "dataPath": "params_shard_306.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.76.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "e04d05daa4a1d7dff9c72505ec5e5033" }, { "dataPath": "params_shard_307.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.76.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "413bc17ca708b1c200c0c2f9957fca09" }, { "dataPath": "params_shard_308.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.76.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "86160550862efa5059f1b7eeff3380f0" }, { "dataPath": "params_shard_309.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.76.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "c2d4633ef6cb4fa0a129d4cd00c0156f" }, { "dataPath": "params_shard_310.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.77.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "e0f0859077802767614438ace4923b82" }, { "dataPath": "params_shard_311.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.77.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "870e199308d70e4a927e264fa4a45aed" }, { "dataPath": "params_shard_312.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.77.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "69811870795b8d03e7030bf96b8c3e73" }, { "dataPath": "params_shard_313.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.77.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "9a11f655d8421fa7bf58ef7129ee4f2f" }, { "dataPath": "params_shard_314.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.78.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "9665b6140d2eebf4486f8dbdd8cc21e1" }, { "dataPath": "params_shard_315.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.78.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "dbe92c56650a012ba709e44a6aed1b54" }, { "dataPath": "params_shard_316.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.78.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "4b867ad770b07e0b9a66f0ac719cc900" }, { "dataPath": "params_shard_317.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.78.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "ccd45c12cd6c586db88c021d3913ee3e" }, { "dataPath": "params_shard_318.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.79.mlp.down_proj.q_weight", "shape": [ 8192, 28672 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "6b335412c60f845071bfad95a2865f4d" }, { "dataPath": "params_shard_319.bin", "format": "raw-shard", "nbytes": 469762048, "records": [ { "name": "model.layers.79.mlp.gate_up_proj.q_weight", "shape": [ 57344, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 469762048, "byteOffset": 0 } ], "md5sum": "9eeffae3c6160a0bb9c16d82740505d4" }, { "dataPath": "params_shard_320.bin", "format": "raw-shard", "nbytes": 83886080, "records": [ { "name": "model.layers.79.self_attn.qkv_proj.q_weight", "shape": [ 10240, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 83886080, "byteOffset": 0 } ], "md5sum": "69a6463f2da2b1fb99a5b6d88031ed7f" }, { "dataPath": "params_shard_321.bin", "format": "raw-shard", "nbytes": 67108864, "records": [ { "name": "model.layers.79.self_attn.o_proj.q_weight", "shape": [ 8192, 8192 ], "dtype": "e4m3_float8", "format": "f32-to-bf16", "nbytes": 67108864, "byteOffset": 0 } ], "md5sum": "6f0968bf476cb3778e8ff35a0f4ae217" }, { "dataPath": "params_shard_322.bin", "format": "raw-shard", "nbytes": 2638464, "records": [ { "name": "model.layers.0.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 0 }, { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 16384 }, { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 16386 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 16388 }, { "name": "model.layers.0.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 32772 }, { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 32774 }, { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 32776 }, { "name": "model.layers.1.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 32778 }, { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 32780 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 32782 }, { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 49166 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 49168 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 65552 }, { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 81936 }, { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 81938 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 81940 }, { "name": "model.layers.2.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 98324 }, { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 98326 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 98328 }, { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 114712 }, { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 114714 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 114716 }, { "name": "model.layers.3.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 131100 }, { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 131102 }, { "name": "model.layers.4.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 131104 }, { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 131106 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 131108 }, { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 147492 }, { "name": "model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 147494 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 147496 }, { "name": "model.layers.10.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 163880 }, { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 163882 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 163884 }, { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 180268 }, { "name": "model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 180270 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 180272 }, { "name": "model.layers.11.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 196656 }, { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 196658 }, { "name": "model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 196660 }, { "name": "model.layers.12.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 196662 }, { "name": "model.layers.12.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 196664 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 196666 }, { "name": "model.layers.12.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 213050 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 213052 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 229436 }, { "name": "model.layers.13.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 245820 }, { "name": "model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 245822 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 245824 }, { "name": "model.layers.13.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 262208 }, { "name": "model.layers.13.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 262210 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 262212 }, { "name": "model.layers.14.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 278596 }, { "name": "model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 278598 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 278600 }, { "name": "model.layers.14.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 294984 }, { "name": "model.layers.14.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 294986 }, { "name": "model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 294988 }, { "name": "model.layers.15.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 294990 }, { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 294992 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 294994 }, { "name": "model.layers.15.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 311378 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 311380 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 327764 }, { "name": "model.layers.16.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 344148 }, { "name": "model.layers.16.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 344150 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 344152 }, { "name": "model.layers.16.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 360536 }, { "name": "model.layers.16.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 360538 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 360540 }, { "name": "model.layers.17.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 376924 }, { "name": "model.layers.17.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 376926 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 376928 }, { "name": "model.layers.17.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 393312 }, { "name": "model.layers.17.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 393314 }, { "name": "model.layers.18.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 393316 }, { "name": "model.layers.18.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 393318 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 393320 }, { "name": "model.layers.18.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 409704 }, { "name": "model.layers.18.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 409706 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 409708 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 426092 }, { "name": "model.layers.19.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 442476 }, { "name": "model.layers.19.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 442478 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 442480 }, { "name": "model.layers.19.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 458864 }, { "name": "model.layers.19.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 458866 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 458868 }, { "name": "model.layers.20.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 475252 }, { "name": "model.layers.20.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 475254 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 475256 }, { "name": "model.layers.20.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 491640 }, { "name": "model.layers.20.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 491642 }, { "name": "model.layers.21.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 491644 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 491646 }, { "name": "model.layers.21.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 508030 }, { "name": "model.layers.21.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 508032 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 508034 }, { "name": "model.layers.21.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 524418 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 524420 }, { "name": "model.layers.22.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 540804 }, { "name": "model.layers.22.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 540806 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 540808 }, { "name": "model.layers.22.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 557192 }, { "name": "model.layers.22.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 557194 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 557196 }, { "name": "model.layers.23.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 573580 }, { "name": "model.layers.23.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 573582 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 573584 }, { "name": "model.layers.23.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 589968 }, { "name": "model.layers.23.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 589970 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 589972 }, { "name": "model.layers.24.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 606356 }, { "name": "model.layers.24.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 606358 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 606360 }, { "name": "model.layers.24.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 622744 }, { "name": "model.layers.24.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 622746 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 622748 }, { "name": "model.layers.25.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 639132 }, { "name": "model.layers.25.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 639134 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 639136 }, { "name": "model.layers.25.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 655520 }, { "name": "model.layers.25.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 655522 }, { "name": "model.layers.26.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 655524 }, { "name": "model.layers.26.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 655526 }, { "name": "model.layers.26.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 655528 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 655530 }, { "name": "model.layers.26.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 671914 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 671916 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 688300 }, { "name": "model.layers.27.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 704684 }, { "name": "model.layers.27.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 704686 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 704688 }, { "name": "model.layers.27.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 721072 }, { "name": "model.layers.27.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 721074 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 721076 }, { "name": "model.layers.28.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 737460 }, { "name": "model.layers.28.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 737462 }, { "name": "model.layers.28.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 737464 }, { "name": "model.layers.28.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 753848 }, { "name": "model.layers.28.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 753850 }, { "name": "model.layers.29.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 753852 }, { "name": "model.layers.29.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 753854 }, { "name": "model.layers.29.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 753856 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 753858 }, { "name": "model.layers.29.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 770242 }, { "name": "model.layers.29.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 770244 }, { "name": "model.layers.30.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 786628 }, { "name": "model.layers.30.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 803012 }, { "name": "model.layers.30.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 803014 }, { "name": "model.layers.30.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 803016 }, { "name": "model.layers.30.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 819400 }, { "name": "model.layers.30.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 819402 }, { "name": "model.layers.31.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 819404 }, { "name": "model.layers.31.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 835788 }, { "name": "model.layers.31.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 835790 }, { "name": "model.layers.31.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 835792 }, { "name": "model.layers.31.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 852176 }, { "name": "model.layers.31.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 852178 }, { "name": "model.layers.32.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 852180 }, { "name": "model.layers.32.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 852182 }, { "name": "model.layers.32.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 852184 }, { "name": "model.layers.32.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 868568 }, { "name": "model.layers.32.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 868570 }, { "name": "model.layers.32.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 868572 }, { "name": "model.layers.33.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 884956 }, { "name": "model.layers.33.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 901340 }, { "name": "model.layers.33.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 901342 }, { "name": "model.layers.33.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 901344 }, { "name": "model.layers.33.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 917728 }, { "name": "model.layers.33.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 917730 }, { "name": "model.layers.34.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 917732 }, { "name": "model.layers.34.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 934116 }, { "name": "model.layers.34.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 934118 }, { "name": "model.layers.34.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 934120 }, { "name": "model.layers.34.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 950504 }, { "name": "model.layers.34.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 950506 }, { "name": "model.layers.35.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 950508 }, { "name": "model.layers.35.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 950510 }, { "name": "model.layers.35.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 966894 }, { "name": "model.layers.35.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 966896 }, { "name": "model.layers.35.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 966898 }, { "name": "model.layers.35.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 983282 }, { "name": "model.layers.36.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 983284 }, { "name": "model.layers.36.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 999668 }, { "name": "model.layers.36.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 999670 }, { "name": "model.layers.36.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 999672 }, { "name": "model.layers.36.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1016056 }, { "name": "model.layers.36.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1016058 }, { "name": "model.layers.37.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1016060 }, { "name": "model.layers.37.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1032444 }, { "name": "model.layers.37.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1032446 }, { "name": "model.layers.37.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1032448 }, { "name": "model.layers.37.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1048832 }, { "name": "model.layers.37.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1048834 }, { "name": "model.layers.38.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1048836 }, { "name": "model.layers.38.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1065220 }, { "name": "model.layers.38.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1065222 }, { "name": "model.layers.38.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1065224 }, { "name": "model.layers.38.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1081608 }, { "name": "model.layers.38.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1081610 }, { "name": "model.layers.39.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1081612 }, { "name": "model.layers.39.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1097996 }, { "name": "model.layers.39.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1097998 }, { "name": "model.layers.39.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1098000 }, { "name": "model.layers.39.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1114384 }, { "name": "model.layers.39.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1114386 }, { "name": "model.layers.40.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1114388 }, { "name": "model.layers.40.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1114390 }, { "name": "model.layers.40.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1114392 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1114394 }, { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1130778 }, { "name": "model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1130780 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1130782 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1147166 }, { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1163550 }, { "name": "model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1163552 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1163554 }, { "name": "model.layers.5.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1179938 }, { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1179940 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1179942 }, { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1196326 }, { "name": "model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1196328 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1196330 }, { "name": "model.layers.6.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1212714 }, { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1212716 }, { "name": "model.layers.7.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1212718 }, { "name": "model.layers.40.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1212720 }, { "name": "model.layers.40.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1229104 }, { "name": "model.layers.40.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1229106 }, { "name": "model.layers.41.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1245490 }, { "name": "model.layers.41.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1261874 }, { "name": "model.layers.41.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1261876 }, { "name": "model.layers.41.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1261878 }, { "name": "model.layers.41.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1278262 }, { "name": "model.layers.41.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1278264 }, { "name": "model.layers.42.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1278266 }, { "name": "model.layers.42.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1294650 }, { "name": "model.layers.42.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1294652 }, { "name": "model.layers.42.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1294654 }, { "name": "model.layers.42.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1311038 }, { "name": "model.layers.42.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1311040 }, { "name": "model.layers.43.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1311042 }, { "name": "model.layers.43.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1311044 }, { "name": "model.layers.43.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1311046 }, { "name": "model.layers.43.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1311048 }, { "name": "model.layers.43.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1327432 }, { "name": "model.layers.43.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1327434 }, { "name": "model.layers.44.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1343818 }, { "name": "model.layers.44.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1360202 }, { "name": "model.layers.44.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1360204 }, { "name": "model.layers.44.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1360206 }, { "name": "model.layers.44.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1376590 }, { "name": "model.layers.44.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1376592 }, { "name": "model.layers.45.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1376594 }, { "name": "model.layers.45.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1392978 }, { "name": "model.layers.45.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1392980 }, { "name": "model.layers.45.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1392982 }, { "name": "model.layers.45.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1409366 }, { "name": "model.layers.45.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1409368 }, { "name": "model.layers.46.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1409370 }, { "name": "model.layers.46.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1409372 }, { "name": "model.layers.46.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1409374 }, { "name": "model.layers.46.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1425758 }, { "name": "model.layers.46.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1425760 }, { "name": "model.layers.46.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1425762 }, { "name": "model.layers.47.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1442146 }, { "name": "model.layers.47.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1458530 }, { "name": "model.layers.47.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1458532 }, { "name": "model.layers.47.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1458534 }, { "name": "model.layers.47.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1474918 }, { "name": "model.layers.47.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1474920 }, { "name": "model.layers.48.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1474922 }, { "name": "model.layers.48.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1491306 }, { "name": "model.layers.48.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1491308 }, { "name": "model.layers.48.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1491310 }, { "name": "model.layers.48.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1507694 }, { "name": "model.layers.48.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1507696 }, { "name": "model.layers.49.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1507698 }, { "name": "model.layers.49.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1507700 }, { "name": "model.layers.49.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1524084 }, { "name": "model.layers.49.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1524086 }, { "name": "model.layers.49.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1524088 }, { "name": "model.layers.49.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1540472 }, { "name": "model.layers.50.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1540474 }, { "name": "model.layers.50.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1556858 }, { "name": "model.layers.50.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1556860 }, { "name": "model.layers.50.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1556862 }, { "name": "model.layers.50.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1573246 }, { "name": "model.layers.50.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1573248 }, { "name": "model.layers.51.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1573250 }, { "name": "model.layers.51.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1589634 }, { "name": "model.layers.51.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1589636 }, { "name": "model.layers.51.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1589638 }, { "name": "model.layers.51.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1606022 }, { "name": "model.layers.51.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1606024 }, { "name": "model.layers.52.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1606026 }, { "name": "model.layers.52.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1622410 }, { "name": "model.layers.52.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1622412 }, { "name": "model.layers.52.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1622414 }, { "name": "model.layers.52.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1638798 }, { "name": "model.layers.52.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1638800 }, { "name": "model.layers.53.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1638802 }, { "name": "model.layers.53.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1655186 }, { "name": "model.layers.53.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1655188 }, { "name": "model.layers.53.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1655190 }, { "name": "model.layers.53.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1671574 }, { "name": "model.layers.53.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1671576 }, { "name": "model.layers.54.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1671578 }, { "name": "model.layers.54.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1671580 }, { "name": "model.layers.54.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1671582 }, { "name": "model.layers.54.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1671584 }, { "name": "model.layers.54.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1687968 }, { "name": "model.layers.54.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1687970 }, { "name": "model.layers.55.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1704354 }, { "name": "model.layers.55.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1720738 }, { "name": "model.layers.55.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1720740 }, { "name": "model.layers.55.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1720742 }, { "name": "model.layers.55.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1737126 }, { "name": "model.layers.55.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1737128 }, { "name": "model.layers.56.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1737130 }, { "name": "model.layers.56.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1753514 }, { "name": "model.layers.56.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1753516 }, { "name": "model.layers.56.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1753518 }, { "name": "model.layers.56.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1769902 }, { "name": "model.layers.56.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1769904 }, { "name": "model.layers.57.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1769906 }, { "name": "model.layers.57.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1769908 }, { "name": "model.layers.57.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1769910 }, { "name": "model.layers.57.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1769912 }, { "name": "model.layers.57.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1786296 }, { "name": "model.layers.57.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1786298 }, { "name": "model.layers.58.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1802682 }, { "name": "model.layers.58.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1819066 }, { "name": "model.layers.58.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1819068 }, { "name": "model.layers.58.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1819070 }, { "name": "model.layers.58.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1835454 }, { "name": "model.layers.58.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1835456 }, { "name": "model.layers.59.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1835458 }, { "name": "model.layers.59.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1851842 }, { "name": "model.layers.59.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1851844 }, { "name": "model.layers.59.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1851846 }, { "name": "model.layers.59.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1868230 }, { "name": "model.layers.59.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1868232 }, { "name": "model.layers.60.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1868234 }, { "name": "model.layers.60.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1868236 }, { "name": "model.layers.60.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1868238 }, { "name": "model.layers.60.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1884622 }, { "name": "model.layers.60.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1884624 }, { "name": "model.layers.60.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1884626 }, { "name": "model.layers.61.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1901010 }, { "name": "model.layers.61.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1917394 }, { "name": "model.layers.61.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1917396 }, { "name": "model.layers.61.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1917398 }, { "name": "model.layers.61.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1933782 }, { "name": "model.layers.61.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1933784 }, { "name": "model.layers.62.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1933786 }, { "name": "model.layers.62.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1950170 }, { "name": "model.layers.62.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1950172 }, { "name": "model.layers.62.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1950174 }, { "name": "model.layers.62.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1966558 }, { "name": "model.layers.62.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1966560 }, { "name": "model.layers.63.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1966562 }, { "name": "model.layers.63.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1966564 }, { "name": "model.layers.63.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1982948 }, { "name": "model.layers.63.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1982950 }, { "name": "model.layers.63.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1982952 }, { "name": "model.layers.63.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 1999336 }, { "name": "model.layers.64.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 1999338 }, { "name": "model.layers.64.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 2015722 }, { "name": "model.layers.64.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 2015724 }, { "name": "model.layers.64.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2015726 }, { "name": "model.layers.64.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 2032110 }, { "name": "model.layers.64.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 2032112 }, { "name": "model.layers.65.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2032114 }, { "name": "model.layers.65.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 2048498 }, { "name": "model.layers.65.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 2048500 }, { "name": "model.layers.65.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2048502 }, { "name": "model.layers.65.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 2064886 }, { "name": "model.layers.65.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 2064888 }, { "name": "model.layers.66.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2064890 }, { "name": "model.layers.66.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 2081274 }, { "name": "model.layers.66.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 2081276 }, { "name": "model.layers.66.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2081278 }, { "name": "model.layers.66.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 2097662 }, { "name": "model.layers.66.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 2097664 }, { "name": "model.layers.67.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2097666 }, { "name": "model.layers.67.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 2114050 }, { "name": "model.layers.67.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 2114052 }, { "name": "model.layers.67.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2114054 }, { "name": "model.layers.67.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 2130438 }, { "name": "model.layers.67.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 2130440 }, { "name": "model.layers.68.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 2130442 }, { "name": "model.layers.68.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 2130444 }, { "name": "model.layers.68.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 2130446 }, { "name": "model.layers.68.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2130448 }, { "name": "model.layers.68.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 2146832 }, { "name": "model.layers.68.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2146834 }, { "name": "model.layers.69.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2163218 }, { "name": "model.layers.69.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 2179602 }, { "name": "model.layers.69.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 2179604 }, { "name": "model.layers.69.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2179606 }, { "name": "model.layers.69.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 2195990 }, { "name": "model.layers.69.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 2195992 }, { "name": "model.layers.70.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2195994 }, { "name": "model.layers.70.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 2212378 }, { "name": "model.layers.70.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 2212380 }, { "name": "model.layers.70.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2212382 }, { "name": "model.layers.70.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 2228766 }, { "name": "model.layers.70.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 2228768 }, { "name": "model.layers.71.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 2228770 }, { "name": "model.layers.71.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 2228772 }, { "name": "model.layers.71.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 2228774 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2228776 }, { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 2245160 }, { "name": "model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 2245162 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2245164 }, { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 2261548 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2261550 }, { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 2277934 }, { "name": "model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 2277936 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2277938 }, { "name": "model.layers.8.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 2294322 }, { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 2294324 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2294326 }, { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 2310710 }, { "name": "model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 2310712 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2310714 }, { "name": "model.layers.9.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 2327098 }, { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 2327100 }, { "name": "model.layers.71.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2327102 }, { "name": "model.layers.71.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 2343486 }, { "name": "model.layers.71.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2343488 }, { "name": "model.layers.72.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2359872 }, { "name": "model.layers.72.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 2376256 }, { "name": "model.layers.72.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 2376258 }, { "name": "model.layers.72.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2376260 }, { "name": "model.layers.72.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 2392644 }, { "name": "model.layers.72.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 2392646 }, { "name": "model.layers.73.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2392648 }, { "name": "model.layers.73.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 2409032 }, { "name": "model.layers.73.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 2409034 }, { "name": "model.layers.73.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2409036 }, { "name": "model.layers.73.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 2425420 }, { "name": "model.layers.73.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 2425422 }, { "name": "model.layers.74.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 2425424 }, { "name": "model.layers.74.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 2425426 }, { "name": "model.layers.74.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2425428 }, { "name": "model.layers.74.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 2441812 }, { "name": "model.layers.74.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 2441814 }, { "name": "model.layers.74.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2441816 }, { "name": "model.layers.75.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2458200 }, { "name": "model.layers.75.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 2474584 }, { "name": "model.layers.75.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 2474586 }, { "name": "model.layers.75.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2474588 }, { "name": "model.layers.75.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 2490972 }, { "name": "model.layers.75.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 2490974 }, { "name": "model.layers.76.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2490976 }, { "name": "model.layers.76.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 2507360 }, { "name": "model.layers.76.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 2507362 }, { "name": "model.layers.76.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2507364 }, { "name": "model.layers.76.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 2523748 }, { "name": "model.layers.76.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 2523750 }, { "name": "model.layers.77.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 2523752 }, { "name": "model.layers.77.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2523754 }, { "name": "model.layers.77.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 2540138 }, { "name": "model.layers.77.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 2540140 }, { "name": "model.layers.77.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2540142 }, { "name": "model.layers.77.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 2556526 }, { "name": "model.layers.78.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2556528 }, { "name": "model.layers.78.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 2572912 }, { "name": "model.layers.78.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 2572914 }, { "name": "model.layers.78.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2572916 }, { "name": "model.layers.78.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 2589300 }, { "name": "model.layers.78.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 2589302 }, { "name": "model.layers.79.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2589304 }, { "name": "model.layers.79.mlp.down_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 2605688 }, { "name": "model.layers.79.mlp.gate_up_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 2605690 }, { "name": "model.layers.79.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2605692 }, { "name": "model.layers.79.self_attn.qkv_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 2622076 }, { "name": "model.layers.79.self_attn.o_proj.q_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 2622078 }, { "name": "model.norm.weight", "shape": [ 8192 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 2622080 } ], "md5sum": "bc20ac32db07b3672c74b423b8bf7df4" }, { "dataPath": "params_shard_323.bin", "format": "raw-shard", "nbytes": 640, "records": [ { "name": "model.layers.0.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 0 }, { "name": "model.layers.0.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 2 }, { "name": "model.layers.0.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 4 }, { "name": "model.layers.0.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 6 }, { "name": "model.layers.1.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 8 }, { "name": "model.layers.1.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 10 }, { "name": "model.layers.1.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 12 }, { "name": "model.layers.1.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 14 }, { "name": "model.layers.2.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 16 }, { "name": "model.layers.2.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 18 }, { "name": "model.layers.2.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 20 }, { "name": "model.layers.2.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 22 }, { "name": "model.layers.3.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 24 }, { "name": "model.layers.3.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 26 }, { "name": "model.layers.3.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 28 }, { "name": "model.layers.3.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 30 }, { "name": "model.layers.4.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 32 }, { "name": "model.layers.4.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 34 }, { "name": "model.layers.4.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 36 }, { "name": "model.layers.4.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 38 }, { "name": "model.layers.5.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 40 }, { "name": "model.layers.5.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 42 }, { "name": "model.layers.5.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 44 }, { "name": "model.layers.5.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 46 }, { "name": "model.layers.6.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 48 }, { "name": "model.layers.6.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 50 }, { "name": "model.layers.6.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 52 }, { "name": "model.layers.6.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 54 }, { "name": "model.layers.7.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 56 }, { "name": "model.layers.7.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 58 }, { "name": "model.layers.7.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 60 }, { "name": "model.layers.7.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 62 }, { "name": "model.layers.8.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 64 }, { "name": "model.layers.8.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 66 }, { "name": "model.layers.8.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 68 }, { "name": "model.layers.8.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 70 }, { "name": "model.layers.9.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 72 }, { "name": "model.layers.9.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 74 }, { "name": "model.layers.9.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 76 }, { "name": "model.layers.9.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 78 }, { "name": "model.layers.10.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 80 }, { "name": "model.layers.10.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 82 }, { "name": "model.layers.10.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 84 }, { "name": "model.layers.10.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 86 }, { "name": "model.layers.11.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 88 }, { "name": "model.layers.11.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 90 }, { "name": "model.layers.11.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 92 }, { "name": "model.layers.11.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 94 }, { "name": "model.layers.12.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 96 }, { "name": "model.layers.12.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 98 }, { "name": "model.layers.12.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 100 }, { "name": "model.layers.12.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 102 }, { "name": "model.layers.13.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 104 }, { "name": "model.layers.13.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 106 }, { "name": "model.layers.13.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 108 }, { "name": "model.layers.13.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 110 }, { "name": "model.layers.14.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 112 }, { "name": "model.layers.14.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 114 }, { "name": "model.layers.14.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 116 }, { "name": "model.layers.14.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 118 }, { "name": "model.layers.15.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 120 }, { "name": "model.layers.15.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 122 }, { "name": "model.layers.15.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 124 }, { "name": "model.layers.15.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 126 }, { "name": "model.layers.16.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 128 }, { "name": "model.layers.16.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 130 }, { "name": "model.layers.16.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 132 }, { "name": "model.layers.16.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 134 }, { "name": "model.layers.17.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 136 }, { "name": "model.layers.17.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 138 }, { "name": "model.layers.17.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 140 }, { "name": "model.layers.17.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 142 }, { "name": "model.layers.18.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 144 }, { "name": "model.layers.18.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 146 }, { "name": "model.layers.18.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 148 }, { "name": "model.layers.18.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 150 }, { "name": "model.layers.19.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 152 }, { "name": "model.layers.19.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 154 }, { "name": "model.layers.19.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 156 }, { "name": "model.layers.19.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 158 }, { "name": "model.layers.20.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 160 }, { "name": "model.layers.20.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 162 }, { "name": "model.layers.20.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 164 }, { "name": "model.layers.20.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 166 }, { "name": "model.layers.21.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 168 }, { "name": "model.layers.21.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 170 }, { "name": "model.layers.21.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 172 }, { "name": "model.layers.21.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 174 }, { "name": "model.layers.22.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 176 }, { "name": "model.layers.22.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 178 }, { "name": "model.layers.22.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 180 }, { "name": "model.layers.22.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 182 }, { "name": "model.layers.23.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 184 }, { "name": "model.layers.23.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 186 }, { "name": "model.layers.23.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 188 }, { "name": "model.layers.23.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 190 }, { "name": "model.layers.24.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 192 }, { "name": "model.layers.24.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 194 }, { "name": "model.layers.24.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 196 }, { "name": "model.layers.24.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 198 }, { "name": "model.layers.25.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 200 }, { "name": "model.layers.25.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 202 }, { "name": "model.layers.25.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 204 }, { "name": "model.layers.25.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 206 }, { "name": "model.layers.26.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 208 }, { "name": "model.layers.26.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 210 }, { "name": "model.layers.26.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 212 }, { "name": "model.layers.26.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 214 }, { "name": "model.layers.27.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 216 }, { "name": "model.layers.27.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 218 }, { "name": "model.layers.27.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 220 }, { "name": "model.layers.27.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 222 }, { "name": "model.layers.28.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 224 }, { "name": "model.layers.28.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 226 }, { "name": "model.layers.28.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 228 }, { "name": "model.layers.28.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 230 }, { "name": "model.layers.29.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 232 }, { "name": "model.layers.29.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 234 }, { "name": "model.layers.29.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 236 }, { "name": "model.layers.29.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 238 }, { "name": "model.layers.30.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 240 }, { "name": "model.layers.30.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 242 }, { "name": "model.layers.30.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 244 }, { "name": "model.layers.30.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 246 }, { "name": "model.layers.31.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 248 }, { "name": "model.layers.31.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 250 }, { "name": "model.layers.31.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 252 }, { "name": "model.layers.31.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 254 }, { "name": "model.layers.32.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 256 }, { "name": "model.layers.32.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 258 }, { "name": "model.layers.32.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 260 }, { "name": "model.layers.32.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 262 }, { "name": "model.layers.33.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 264 }, { "name": "model.layers.33.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 266 }, { "name": "model.layers.33.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 268 }, { "name": "model.layers.33.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 270 }, { "name": "model.layers.34.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 272 }, { "name": "model.layers.34.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 274 }, { "name": "model.layers.34.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 276 }, { "name": "model.layers.34.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 278 }, { "name": "model.layers.35.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 280 }, { "name": "model.layers.35.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 282 }, { "name": "model.layers.35.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 284 }, { "name": "model.layers.35.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 286 }, { "name": "model.layers.36.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 288 }, { "name": "model.layers.36.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 290 }, { "name": "model.layers.36.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 292 }, { "name": "model.layers.36.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 294 }, { "name": "model.layers.37.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 296 }, { "name": "model.layers.37.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 298 }, { "name": "model.layers.37.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 300 }, { "name": "model.layers.37.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 302 }, { "name": "model.layers.38.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 304 }, { "name": "model.layers.38.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 306 }, { "name": "model.layers.38.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 308 }, { "name": "model.layers.38.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 310 }, { "name": "model.layers.39.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 312 }, { "name": "model.layers.39.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 314 }, { "name": "model.layers.39.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 316 }, { "name": "model.layers.39.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 318 }, { "name": "model.layers.40.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 320 }, { "name": "model.layers.40.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 322 }, { "name": "model.layers.40.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 324 }, { "name": "model.layers.40.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 326 }, { "name": "model.layers.41.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 328 }, { "name": "model.layers.41.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 330 }, { "name": "model.layers.41.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 332 }, { "name": "model.layers.41.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 334 }, { "name": "model.layers.42.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 336 }, { "name": "model.layers.42.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 338 }, { "name": "model.layers.42.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 340 }, { "name": "model.layers.42.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 342 }, { "name": "model.layers.43.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 344 }, { "name": "model.layers.43.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 346 }, { "name": "model.layers.43.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 348 }, { "name": "model.layers.43.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 350 }, { "name": "model.layers.44.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 352 }, { "name": "model.layers.44.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 354 }, { "name": "model.layers.44.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 356 }, { "name": "model.layers.44.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 358 }, { "name": "model.layers.45.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 360 }, { "name": "model.layers.45.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 362 }, { "name": "model.layers.45.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 364 }, { "name": "model.layers.45.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 366 }, { "name": "model.layers.46.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 368 }, { "name": "model.layers.46.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 370 }, { "name": "model.layers.46.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 372 }, { "name": "model.layers.46.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 374 }, { "name": "model.layers.47.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 376 }, { "name": "model.layers.47.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 378 }, { "name": "model.layers.47.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 380 }, { "name": "model.layers.47.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 382 }, { "name": "model.layers.48.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 384 }, { "name": "model.layers.48.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 386 }, { "name": "model.layers.48.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 388 }, { "name": "model.layers.48.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 390 }, { "name": "model.layers.49.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 392 }, { "name": "model.layers.49.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 394 }, { "name": "model.layers.49.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 396 }, { "name": "model.layers.49.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 398 }, { "name": "model.layers.50.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 400 }, { "name": "model.layers.50.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 402 }, { "name": "model.layers.50.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 404 }, { "name": "model.layers.50.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 406 }, { "name": "model.layers.51.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 408 }, { "name": "model.layers.51.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 410 }, { "name": "model.layers.51.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 412 }, { "name": "model.layers.51.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 414 }, { "name": "model.layers.52.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 416 }, { "name": "model.layers.52.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 418 }, { "name": "model.layers.52.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 420 }, { "name": "model.layers.52.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 422 }, { "name": "model.layers.53.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 424 }, { "name": "model.layers.53.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 426 }, { "name": "model.layers.53.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 428 }, { "name": "model.layers.53.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 430 }, { "name": "model.layers.54.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 432 }, { "name": "model.layers.54.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 434 }, { "name": "model.layers.54.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 436 }, { "name": "model.layers.54.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 438 }, { "name": "model.layers.55.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 440 }, { "name": "model.layers.55.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 442 }, { "name": "model.layers.55.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 444 }, { "name": "model.layers.55.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 446 }, { "name": "model.layers.56.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 448 }, { "name": "model.layers.56.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 450 }, { "name": "model.layers.56.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 452 }, { "name": "model.layers.56.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 454 }, { "name": "model.layers.57.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 456 }, { "name": "model.layers.57.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 458 }, { "name": "model.layers.57.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 460 }, { "name": "model.layers.57.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 462 }, { "name": "model.layers.58.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 464 }, { "name": "model.layers.58.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 466 }, { "name": "model.layers.58.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 468 }, { "name": "model.layers.58.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 470 }, { "name": "model.layers.59.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 472 }, { "name": "model.layers.59.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 474 }, { "name": "model.layers.59.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 476 }, { "name": "model.layers.59.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 478 }, { "name": "model.layers.60.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 480 }, { "name": "model.layers.60.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 482 }, { "name": "model.layers.60.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 484 }, { "name": "model.layers.60.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 486 }, { "name": "model.layers.61.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 488 }, { "name": "model.layers.61.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 490 }, { "name": "model.layers.61.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 492 }, { "name": "model.layers.61.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 494 }, { "name": "model.layers.62.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 496 }, { "name": "model.layers.62.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 498 }, { "name": "model.layers.62.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 500 }, { "name": "model.layers.62.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 502 }, { "name": "model.layers.63.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 504 }, { "name": "model.layers.63.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 506 }, { "name": "model.layers.63.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 508 }, { "name": "model.layers.63.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 510 }, { "name": "model.layers.64.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 512 }, { "name": "model.layers.64.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 514 }, { "name": "model.layers.64.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 516 }, { "name": "model.layers.64.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 518 }, { "name": "model.layers.65.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 520 }, { "name": "model.layers.65.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 522 }, { "name": "model.layers.65.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 524 }, { "name": "model.layers.65.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 526 }, { "name": "model.layers.66.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 528 }, { "name": "model.layers.66.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 530 }, { "name": "model.layers.66.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 532 }, { "name": "model.layers.66.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 534 }, { "name": "model.layers.67.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 536 }, { "name": "model.layers.67.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 538 }, { "name": "model.layers.67.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 540 }, { "name": "model.layers.67.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 542 }, { "name": "model.layers.68.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 544 }, { "name": "model.layers.68.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 546 }, { "name": "model.layers.68.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 548 }, { "name": "model.layers.68.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 550 }, { "name": "model.layers.69.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 552 }, { "name": "model.layers.69.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 554 }, { "name": "model.layers.69.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 556 }, { "name": "model.layers.69.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 558 }, { "name": "model.layers.70.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 560 }, { "name": "model.layers.70.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 562 }, { "name": "model.layers.70.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 564 }, { "name": "model.layers.70.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 566 }, { "name": "model.layers.71.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 568 }, { "name": "model.layers.71.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 570 }, { "name": "model.layers.71.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 572 }, { "name": "model.layers.71.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 574 }, { "name": "model.layers.72.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 576 }, { "name": "model.layers.72.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 578 }, { "name": "model.layers.72.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 580 }, { "name": "model.layers.72.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 582 }, { "name": "model.layers.73.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 584 }, { "name": "model.layers.73.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 586 }, { "name": "model.layers.73.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 588 }, { "name": "model.layers.73.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 590 }, { "name": "model.layers.74.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 592 }, { "name": "model.layers.74.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 594 }, { "name": "model.layers.74.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 596 }, { "name": "model.layers.74.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 598 }, { "name": "model.layers.75.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 600 }, { "name": "model.layers.75.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 602 }, { "name": "model.layers.75.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 604 }, { "name": "model.layers.75.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 606 }, { "name": "model.layers.76.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 608 }, { "name": "model.layers.76.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 610 }, { "name": "model.layers.76.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 612 }, { "name": "model.layers.76.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 614 }, { "name": "model.layers.77.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 616 }, { "name": "model.layers.77.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 618 }, { "name": "model.layers.77.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 620 }, { "name": "model.layers.77.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 622 }, { "name": "model.layers.78.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 624 }, { "name": "model.layers.78.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 626 }, { "name": "model.layers.78.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 628 }, { "name": "model.layers.78.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 630 }, { "name": "model.layers.79.self_attn.qkv_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 632 }, { "name": "model.layers.79.self_attn.o_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 634 }, { "name": "model.layers.79.mlp.gate_up_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 636 }, { "name": "model.layers.79.mlp.down_proj.q_calibration_scale", "shape": [ 1 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2, "byteOffset": 638 } ], "md5sum": "34e32dfc0814e3592da944303c6bc891" } ] }