diff --git "a/ndarray-cache.json" "b/ndarray-cache.json" new file mode 100644--- /dev/null +++ "b/ndarray-cache.json" @@ -0,0 +1,6849 @@ +{ + "metadata": { + "ParamSize": 451, + "ParamBytes": 65527752704.0, + "BitsPerParam": 12.329999342718688 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 1557135360, + "records": [ + { + "name": "lm_head.weight", + "shape": [ + 152064, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1557135360, + "byteOffset": 0 + } + ], + "md5sum": "8bf83241e43ac68721913d2dfa887427" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.62.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "3a2cd1521a89755d087c7ba2fa591a22" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.62.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "95b9a88dcea26230448ec154fd309afb" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.63.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "4fd13fa67b9d5f79cc6485ed3d28979a" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.63.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "c1c5ffbdad9f682c66f72cab3b2f9402" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.63.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "8ec877c0d41243d26b338f0562d4abb6" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.63.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "dbbe3ea33a7aa6d151f8bc502ad9b64f" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 1557135360, + "records": [ + { + "name": "model.embed_tokens.weight", + "shape": [ + 152064, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1557135360, + "byteOffset": 0 + } + ], + "md5sum": "cc0c1ba518eaa3a45514e23abe4299c8" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.0.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "750a88fb02cce3c9f9919e42baba12a6" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "da377e8667831dd98d0d0c22547bd75d" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.0.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "5bdf20548218be77a7cc28ded0da6a22" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.0.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "626a8765ebdfb72e7d3324dada211c73" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.1.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "38b458ac7408ee93886b6b2d082c4b40" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "26e1f1547c884f3d6170c7521d568d24" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.1.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "ecb48889a46e0cd9705965dea8ca8b78" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.1.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "733627a24c80d698dea227bed0290726" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "a380a25fddf0adeee2453f41b1dc3984" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.2.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "bc8f376d52c6a428062337492099bdfa" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.2.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "6a1658a6cd3a731449eb5a906cc7fc5a" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "340f1c8e1a808db4cc39c006d912f217" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.10.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "94a8ed1f476a15cd98dcf7f4a772e2e5" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.10.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "0e687e6b0f756040ce7243a415e612cf" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.6.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "2a01718352968cdf4a68a9678c972aef" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "cac3fe21d05f4aa6aaf787987be8689e" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.7.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "1857b413f6bd10e9888a6026d8f1809b" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "15f10db579cac98acf81a3a2180726f2" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.7.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "c0ca940488253a2b8fa0d083623556a6" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.7.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "d4cbfcc5f41e2adc544406cb0ce4ab5e" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.8.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "3f6a60bec1f40d4c7244c28ddeabd043" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "7cff4b6593c52592af05e39cfba9ab82" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.8.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "94cb0a9a50dd7b458bbbea475ea99343" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.8.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "ca164b21bb804f2bb6d49875e99286a0" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.9.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "b7a5d09f3e0a9ce96e4a0435ce7df127" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.9.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "8145db65e2d1d6dad0a26f5d89b68bf4" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.9.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "f3f5fdf5b130a7496a2aa2f2ed95ca93" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.9.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "74bf77a8b0b6bb746263bec57a06544f" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.10.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "b2a43d7b3a15e059fb78bc3ce78d1c57" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.11.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "38c28e49c3ab8e7c4796ca9d9f469166" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "2f324f045f7aa9d5ee601fc62aeed7e0" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.11.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "44191dad41262303d0b6e9d6553b35f2" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.11.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "660f71a6cb7b671ca561ae031780d3b5" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.12.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "8f0b35a54f22264278ceeabc5cc149f2" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "a8ae995a159da72c7a5126b5a73676ea" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.12.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "70e63f491cc05a5c088be46e30d7a237" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.12.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "43116a9325c85d861ee524cc54c3285d" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.13.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "be602943c36425031049441fdf367ce6" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.13.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "fefc52a122bb90b9e4160d4f21083282" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.13.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "44c326edafee467efbc3bcc081d152ea" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.13.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "cf327a42bf8ad722518ec1f5065bc144" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "815db9399848debf0289298735fd1e9a" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.14.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "3183e28451248be86a0503d29cac0ba8" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.14.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "0668b933d0df82b676db0207a6f0dfe8" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.14.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "72070d68b4457530588912b42136553e" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.15.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "54e25f7d5622098ec4816ccba2fa8a40" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.15.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "101194e3f29a6fa7fca3392ae71a6e99" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.15.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "d020377a2d58c9a5a88b0bb9d3d6725d" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.15.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "7d9305def91c159483fc90a50d8761e6" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.16.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "77312040fe72c0e759162f4da92d8b4e" + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "69c033460bed6b238d95167a8a373a11" + }, + { + "dataPath": "params_shard_59.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.16.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "df321c1badefdf81f1ae8e021af33a6f" + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.16.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "7d1dc778a6d0a1391dba239a3422d0fc" + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.17.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "4e6914b150b6ede90e7d573f97d3fa88" + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.17.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "14d32a094cb85b8d21d62eed56e9bc9f" + }, + { + "dataPath": "params_shard_63.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.17.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "a5b2444cd06bc8370c1b2510955eb858" + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.17.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "7631a2493f512c0866b1894af0f3f959" + }, + { + "dataPath": "params_shard_65.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.18.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "9a8d1a5b0ae17c691a3f47bd6dd577af" + }, + { + "dataPath": "params_shard_66.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.18.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "515b6d77a5a19fe03a83aaf496c9c837" + }, + { + "dataPath": "params_shard_67.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.18.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "0170e0410ea94109e16b9a91fee0003d" + }, + { + "dataPath": "params_shard_68.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.18.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "0fa558ffd19835d3a02133f782b9768e" + }, + { + "dataPath": "params_shard_69.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.19.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "efb9eef8ae6e3fc857f865f95314c503" + }, + { + "dataPath": "params_shard_70.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.19.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "fe19ba3901cb7af7ec8a07e2940338b2" + }, + { + "dataPath": "params_shard_71.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.19.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "4af2bc50b8cd4b5ba6f1e1df25eeb84d" + }, + { + "dataPath": "params_shard_72.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.19.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "480663ab7ea11557bdb28e9052df7f69" + }, + { + "dataPath": "params_shard_73.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.20.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "90b584b5015ee5243d5c352944695163" + }, + { + "dataPath": "params_shard_74.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "48f3874237b80f428f1195b500ee5435" + }, + { + "dataPath": "params_shard_75.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.20.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "b71f052deed39570d4139917a1d698a2" + }, + { + "dataPath": "params_shard_76.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.20.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "e05bcc9e16b866eef42aa09105f3d271" + }, + { + "dataPath": "params_shard_77.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.21.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "a1dc616584480f814cbcc5da0b56224c" + }, + { + "dataPath": "params_shard_78.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.21.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "e305c78d267a1e20e2a2fa849f67c56b" + }, + { + "dataPath": "params_shard_79.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.21.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "9031feff0ea7fdb40cb727ba244f34bb" + }, + { + "dataPath": "params_shard_80.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.21.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "2ad41401f287b6a24ac57daed89fba87" + }, + { + "dataPath": "params_shard_81.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "4abe4f498c949dead7cdef9a9e87a22f" + }, + { + "dataPath": "params_shard_82.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.22.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "fc8c3e78b21d686dc7d939a2d08105e2" + }, + { + "dataPath": "params_shard_83.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.22.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "c9c6e287473a7851b8e7684c2a115024" + }, + { + "dataPath": "params_shard_84.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.2.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "abcab40cbc31e0ed742745421e4b65b4" + }, + { + "dataPath": "params_shard_85.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.3.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "938d107f0afd73606bffb7f00f44ad25" + }, + { + "dataPath": "params_shard_86.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "d788d3d8ae166f0a619c88a3711a924c" + }, + { + "dataPath": "params_shard_87.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.3.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "62e1244d26b917c3c4d0ca004cba8e35" + }, + { + "dataPath": "params_shard_88.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.3.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "75c653de3e52fc99520ce3e29905f80e" + }, + { + "dataPath": "params_shard_89.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.4.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "d51ea58285691f611d78cd02ce26c0dd" + }, + { + "dataPath": "params_shard_90.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "ebd9332447c57517cfeac7593a958743" + }, + { + "dataPath": "params_shard_91.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.4.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "0bfab887063b835b606c70898032b220" + }, + { + "dataPath": "params_shard_92.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.4.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "6c210f559d703f4592ee1b437c292749" + }, + { + "dataPath": "params_shard_93.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.5.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "0b32060a374a6fd5cb27c4f0a1948ac2" + }, + { + "dataPath": "params_shard_94.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "bb0e294734767c2cd14b75b3cdefd5e2" + }, + { + "dataPath": "params_shard_95.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.5.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "3b4085746957cdaaadc0f9fcd0c8d815" + }, + { + "dataPath": "params_shard_96.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.5.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "a0e56966dae73cc347e3439fc7bd956b" + }, + { + "dataPath": "params_shard_97.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.6.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "878d62b08e4679074bd287d54132ce6f" + }, + { + "dataPath": "params_shard_98.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.6.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "6208900eca0ad97d0f973ca6b73a9467" + }, + { + "dataPath": "params_shard_99.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.22.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "7a10c872f2e245573a11f489328cb86a" + }, + { + "dataPath": "params_shard_100.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.23.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "617800a9792578ea58626cc97e007069" + }, + { + "dataPath": "params_shard_101.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.23.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "4edef1f89fae9c9a3a0fb4cf7d0ba9fa" + }, + { + "dataPath": "params_shard_102.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.23.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "4502cdd09003027c2f76d3bf754e9253" + }, + { + "dataPath": "params_shard_103.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.23.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "caf2c302f6c233b1d877ba3dac853b8c" + }, + { + "dataPath": "params_shard_104.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.24.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "aae1d1063e0f26fc31bcbc61ed964c7a" + }, + { + "dataPath": "params_shard_105.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "091048af5fc512f2a8b0cd5eddd330a5" + }, + { + "dataPath": "params_shard_106.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.24.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "92040f85317411dd7792387e734b0c87" + }, + { + "dataPath": "params_shard_107.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.24.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "35e7b9a0f2306d1eaa1690abb064936e" + }, + { + "dataPath": "params_shard_108.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.25.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "7f10c420b1401401082906bef611e27e" + }, + { + "dataPath": "params_shard_109.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.25.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "0e3e3499feab3e009c3c40551e507752" + }, + { + "dataPath": "params_shard_110.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.25.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "efe4cc48e7a69e52f9de75d80edd26d8" + }, + { + "dataPath": "params_shard_111.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.25.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "0e7d7675d9750fe2f1fb07b940c31c0d" + }, + { + "dataPath": "params_shard_112.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.26.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "c4537ef2899fef8356eb40619f9b5a51" + }, + { + "dataPath": "params_shard_113.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.26.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "e3e98fa2cb8fcaffe09b196353963bb0" + }, + { + "dataPath": "params_shard_114.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.26.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "8c5cd2d2c7b6117b8724b108578dcd94" + }, + { + "dataPath": "params_shard_115.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.26.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "76c9392eecacd28d1c8a960859112f54" + }, + { + "dataPath": "params_shard_116.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.27.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "43dbfe44305d0a296bb6da74255f597b" + }, + { + "dataPath": "params_shard_117.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.27.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "104915a0f842b7a0419101832ad3cbc7" + }, + { + "dataPath": "params_shard_118.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.27.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "1f5a7b6d42cb544feea0db40fc284fc2" + }, + { + "dataPath": "params_shard_119.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.27.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "b823fdd97ad9ef4996dd8ff96ab05cc5" + }, + { + "dataPath": "params_shard_120.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.28.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "8d643b15b5a2d4eba07b550da3ac3e5d" + }, + { + "dataPath": "params_shard_121.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.28.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "10eeae436effdf1384e3907950f9357d" + }, + { + "dataPath": "params_shard_122.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.28.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "36178a237622b0bcb58b75b770750df0" + }, + { + "dataPath": "params_shard_123.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.28.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "e6e6df278a289f7b566641083311d4b5" + }, + { + "dataPath": "params_shard_124.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.29.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "f2ccdc26383173b03130d3ef526d708d" + }, + { + "dataPath": "params_shard_125.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.29.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "810b35df6bbad5fbdabc72e279cf821b" + }, + { + "dataPath": "params_shard_126.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.29.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "058cf853f014928a58df7997d9c4de5c" + }, + { + "dataPath": "params_shard_127.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.29.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "a511497ea7ad26022566ac65066068ce" + }, + { + "dataPath": "params_shard_128.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.30.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "9b147b97641c70ff6481b91e5c09a84f" + }, + { + "dataPath": "params_shard_129.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.30.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "cb91a194a947817eeac636075c36780e" + }, + { + "dataPath": "params_shard_130.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.30.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "5139e8c5c43b17ad3b205351fbdf9acc" + }, + { + "dataPath": "params_shard_131.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.30.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "945275a89392ae8586b07ab6ef243057" + }, + { + "dataPath": "params_shard_132.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.31.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "a2cc19e92cfa9a3461b468c9b5c43fd6" + }, + { + "dataPath": "params_shard_133.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.31.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "928e2983a51a2b9448d25f9bf372acc5" + }, + { + "dataPath": "params_shard_134.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.31.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "c314569027504f03f1f1bc7dfe2d9649" + }, + { + "dataPath": "params_shard_135.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.31.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "21973fbd3d55a15d0bec2c22187f89a8" + }, + { + "dataPath": "params_shard_136.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.32.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "f4a2f3ac11b800e1a785306d3d6e4c09" + }, + { + "dataPath": "params_shard_137.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.32.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "65701d1def8b234d7c5b70f9604d8a9d" + }, + { + "dataPath": "params_shard_138.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.32.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "bc8496f3e09dd875463a44893a6a0f12" + }, + { + "dataPath": "params_shard_139.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.32.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "aa0465b4e7d7e66d88da7c8d175a436b" + }, + { + "dataPath": "params_shard_140.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.33.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "019c54b71de6936d32c4c7630fa1c682" + }, + { + "dataPath": "params_shard_141.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.33.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "7405fef66e60745665619fdd88e394ff" + }, + { + "dataPath": "params_shard_142.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.33.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "76a6c24b88c4721f214e1ce572d22196" + }, + { + "dataPath": "params_shard_143.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.33.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "1f800ecc0901116d0614f9785e8be745" + }, + { + "dataPath": "params_shard_144.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.34.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "d60681f257cac7ab4c119e31923e7d4e" + }, + { + "dataPath": "params_shard_145.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.34.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "605af38d1de46b0e6f3b7601cb9ce273" + }, + { + "dataPath": "params_shard_146.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.34.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "90ae0a995e3761eb8084fecf4fe9f8a6" + }, + { + "dataPath": "params_shard_147.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.34.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "1e4b20b8cf5281b2cec7470327559956" + }, + { + "dataPath": "params_shard_148.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.35.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "0628853f1b0f249d0c99ea245bf1b02d" + }, + { + "dataPath": "params_shard_149.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.35.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "c2a9f22338360c1b8d3daf50934e7c31" + }, + { + "dataPath": "params_shard_150.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.35.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "7a5dc9d5cfdc781b15f29bfc9ba24ba9" + }, + { + "dataPath": "params_shard_151.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.35.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "2fbfc985aa3ba0f23965098e997caaf4" + }, + { + "dataPath": "params_shard_152.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.36.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "55d190e07fb37f0fa37af4d3b8e9e921" + }, + { + "dataPath": "params_shard_153.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.36.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "785074fa2659756360cc9b31d9325977" + }, + { + "dataPath": "params_shard_154.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.36.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "e3c64009020284a002ae4aaf50494750" + }, + { + "dataPath": "params_shard_155.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.36.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "055fa478d2dbf81cc3ced6a47ab20c73" + }, + { + "dataPath": "params_shard_156.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.37.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "effd16464de2908676f2b5483675ad65" + }, + { + "dataPath": "params_shard_157.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.37.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "aaebd803cf478d8ffb0cfab4964b5abe" + }, + { + "dataPath": "params_shard_158.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.37.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "5717c4b8e048b2676dc2d242dedc5aad" + }, + { + "dataPath": "params_shard_159.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.37.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "989fe41fdaea4b41c01c38dc3dd42c42" + }, + { + "dataPath": "params_shard_160.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.38.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "3f7faa7612d04b7a3c99f872fbee21d5" + }, + { + "dataPath": "params_shard_161.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.38.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "5b230f18df49601ec948e8ce38aac649" + }, + { + "dataPath": "params_shard_162.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.38.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "f90398d1e2b93007491236caccdc8b08" + }, + { + "dataPath": "params_shard_163.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.38.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "09995791efe429f4c2fc696c1a7c208a" + }, + { + "dataPath": "params_shard_164.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.39.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "ff13316a1e0a82466495b8e0c38f7e89" + }, + { + "dataPath": "params_shard_165.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.39.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "6b0658d94bab5354af835b29a7be59e0" + }, + { + "dataPath": "params_shard_166.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.39.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "a0ec51171b6adfb34fe32052c82cf752" + }, + { + "dataPath": "params_shard_167.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.39.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "79d037f6768d3bd1cf8ffff43728f2f5" + }, + { + "dataPath": "params_shard_168.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.40.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "b163b3d7814b2d6ba81cd73808c6fdb8" + }, + { + "dataPath": "params_shard_169.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.40.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "1479e18636d33aca550146d789590365" + }, + { + "dataPath": "params_shard_170.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.40.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "a081296116b4703ee22f6704d8a22b94" + }, + { + "dataPath": "params_shard_171.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.40.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "94ac620c9b351d28e790039f9da6bac5" + }, + { + "dataPath": "params_shard_172.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.41.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "fedd0c494d4f7923501fde38d321f7b4" + }, + { + "dataPath": "params_shard_173.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.41.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "070e4bffdd4cf4030b3e4c74d898439f" + }, + { + "dataPath": "params_shard_174.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.41.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "ecf2e3620b5ca6f901fc3c0e68f50353" + }, + { + "dataPath": "params_shard_175.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.41.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "2929dc1e91c7454abc36a9bded55eb83" + }, + { + "dataPath": "params_shard_176.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.42.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "71ba5e7f388b629d3b63b0ec83725440" + }, + { + "dataPath": "params_shard_177.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.42.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "6e2da3a673f30ecc13ec8063703a4121" + }, + { + "dataPath": "params_shard_178.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.42.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "17d1f4ed8b26ebbcd67c84b49219b548" + }, + { + "dataPath": "params_shard_179.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.42.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "40eedf2a31713e0970e7aa65287dbbec" + }, + { + "dataPath": "params_shard_180.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.43.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "c4e991a2f65757fd6b0c45780167aa8b" + }, + { + "dataPath": "params_shard_181.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.43.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "6d9140953cd3ac0f934008101206011b" + }, + { + "dataPath": "params_shard_182.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.43.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "281d865d1179e91c1ca06dd1b4ca2894" + }, + { + "dataPath": "params_shard_183.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.43.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "b21eff725f9513a5ccc6a70487d5ac4c" + }, + { + "dataPath": "params_shard_184.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.44.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "57b0baea5ce26a48354835856787b898" + }, + { + "dataPath": "params_shard_185.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.44.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "57195cba5383dd4bcc53d1e0014ffeb0" + }, + { + "dataPath": "params_shard_186.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.44.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "46653a6dc93a1c325482f6db7fce7e0b" + }, + { + "dataPath": "params_shard_187.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.44.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "5412751ecf265e2439fd6d66ba460a91" + }, + { + "dataPath": "params_shard_188.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.45.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "0d4bb0c1d5ccaba2783fac54270249b8" + }, + { + "dataPath": "params_shard_189.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.45.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "5eb1a2ccccd164c777e06d4e1e037dd6" + }, + { + "dataPath": "params_shard_190.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.45.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "31616fd48f0632d4fe4095502ffede4c" + }, + { + "dataPath": "params_shard_191.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.45.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "a37784bbbf4a722e567300699f41ae8e" + }, + { + "dataPath": "params_shard_192.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.46.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "90d812751ed32508df3da71cb7de7fda" + }, + { + "dataPath": "params_shard_193.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.46.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "8094a6ab919904ef2857deeb8798a40f" + }, + { + "dataPath": "params_shard_194.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.46.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "f834fddd452f55df81695c3c16b8d4b3" + }, + { + "dataPath": "params_shard_195.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.46.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "c12f184b20d60374af2a92d3deb7a23e" + }, + { + "dataPath": "params_shard_196.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.47.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "e1891734ca2d5386f30fe717427ffed7" + }, + { + "dataPath": "params_shard_197.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.47.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "68d37336d31bd5ea515f9ce58a5887ce" + }, + { + "dataPath": "params_shard_198.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.47.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "0896fcd5530ca9510d41abec3e916da0" + }, + { + "dataPath": "params_shard_199.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.47.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "c2ce6f93f4f9cac65447f5e433ce18d7" + }, + { + "dataPath": "params_shard_200.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.48.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "8febb38777fbfb56a590e968030fc3ff" + }, + { + "dataPath": "params_shard_201.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.48.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "e0358ef4d4132b46ab62a064f22385e2" + }, + { + "dataPath": "params_shard_202.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.48.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "2b2a9c34bc715c546a37fbec3d293362" + }, + { + "dataPath": "params_shard_203.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.48.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "76d438f416ef5e64fb7888952fffceda" + }, + { + "dataPath": "params_shard_204.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.49.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "5974263bffa5d9591e2fd27f1d80721f" + }, + { + "dataPath": "params_shard_205.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.49.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "e81222993066d9194285e327de8d557f" + }, + { + "dataPath": "params_shard_206.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.49.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "f64b70baee4f1242b1c10f8789722096" + }, + { + "dataPath": "params_shard_207.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.49.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "fbaf9f6e807fe5e57f223adc582c5e1e" + }, + { + "dataPath": "params_shard_208.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.50.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "b6638d52b616d1eddfcc65a5a413f8c2" + }, + { + "dataPath": "params_shard_209.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.50.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "aa456c76d66b524369c51e475bb66bf8" + }, + { + "dataPath": "params_shard_210.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.50.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "dbcf0da5abc3287a42b2049d6392e1f0" + }, + { + "dataPath": "params_shard_211.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.50.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "e068db487ec2561d789b907c6a32c940" + }, + { + "dataPath": "params_shard_212.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.51.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "e1bc132bc1cfb27d0663c172b60587ee" + }, + { + "dataPath": "params_shard_213.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.51.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "a468e0d867546666c63b47c308f67334" + }, + { + "dataPath": "params_shard_214.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.51.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "19ca5fc8aa7b8aca37062cbaef76b638" + }, + { + "dataPath": "params_shard_215.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.51.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "8ae104e8420e8cab019012183da340ee" + }, + { + "dataPath": "params_shard_216.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.52.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "2b26e2aba7070769ea6ae1e85d2f4d21" + }, + { + "dataPath": "params_shard_217.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.52.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "c1e3499e9551fd87b9e6a4d26d8acdea" + }, + { + "dataPath": "params_shard_218.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.52.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "90159beff9d4d61950e4746e98dcfca0" + }, + { + "dataPath": "params_shard_219.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.52.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "3d61f51306df5ef973d6b1497834099e" + }, + { + "dataPath": "params_shard_220.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.53.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "19392c3121dee5cb8f41bfe993977ed8" + }, + { + "dataPath": "params_shard_221.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.53.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "8fa22319dfd5b456ed6b70e7c723c49a" + }, + { + "dataPath": "params_shard_222.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.53.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "35146444828dd5ec1c9eb71489d8f489" + }, + { + "dataPath": "params_shard_223.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.53.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "aead69344acdf746bc2f7d97a4f42d18" + }, + { + "dataPath": "params_shard_224.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.54.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "21d8863aeca009899dc1f0a9bfe2696c" + }, + { + "dataPath": "params_shard_225.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.54.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "a50faf95a3cdd5e0535918ef468071d5" + }, + { + "dataPath": "params_shard_226.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.54.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "1f27e7c3c3ea66f70e706308c6610196" + }, + { + "dataPath": "params_shard_227.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.54.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "b94d091f52353c5873918d31454cb4c3" + }, + { + "dataPath": "params_shard_228.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.55.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "147d7063643eb98cde306ff59142f107" + }, + { + "dataPath": "params_shard_229.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.55.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "d2ba937faa11ec52a033c52418d58475" + }, + { + "dataPath": "params_shard_230.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.55.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "c6915cae05677c408181d31c1bd1e622" + }, + { + "dataPath": "params_shard_231.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.55.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "69975ec8dbf4b363dc3b196afe4ae688" + }, + { + "dataPath": "params_shard_232.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.56.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "f11a350ff95430c19f6bb366e8b54122" + }, + { + "dataPath": "params_shard_233.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.56.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "e58c72e8558392d36d5f2e4394204dcf" + }, + { + "dataPath": "params_shard_234.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.56.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "7ae6dd8e728d0b008347828280d79d80" + }, + { + "dataPath": "params_shard_235.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.56.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "3e9cee798bb637f1501c436ba4d483d9" + }, + { + "dataPath": "params_shard_236.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.57.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "010357438083741ead9839651f725bd4" + }, + { + "dataPath": "params_shard_237.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.57.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "f86d31fdec9d8f0476080099c8dda8ab" + }, + { + "dataPath": "params_shard_238.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.57.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "ce3067d42cf6bef83eaa23fa7d4f7f11" + }, + { + "dataPath": "params_shard_239.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.57.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "c5139bde0699b35bb588c8bad734c818" + }, + { + "dataPath": "params_shard_240.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.58.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "db9c49a2d2ec3b9dc6eb4351ab99d3b0" + }, + { + "dataPath": "params_shard_241.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.58.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "7aa97c791d016675d96efd04306068b0" + }, + { + "dataPath": "params_shard_242.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.58.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "6b39a5a8f48d67ef306a70410ee26775" + }, + { + "dataPath": "params_shard_243.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.58.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "3cbda3a8674f794875435ad2050a95f3" + }, + { + "dataPath": "params_shard_244.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.59.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "9618378a815711e3d01f54bfad5d4322" + }, + { + "dataPath": "params_shard_245.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.59.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "66170b368faf02df9019bcae03a32127" + }, + { + "dataPath": "params_shard_246.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.59.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "c79efcbb69d9e5dd9d5843e0192e49b2" + }, + { + "dataPath": "params_shard_247.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.59.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "284805d3144bffc50c487ef361fce9fa" + }, + { + "dataPath": "params_shard_248.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.60.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "df23920619282523a0b183658e53fc4c" + }, + { + "dataPath": "params_shard_249.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.60.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "1affbc29eb99c6e5b57bc4a18d09bbb4" + }, + { + "dataPath": "params_shard_250.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.60.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "82be8662cd70bf3f22a6e4ebe001a1b5" + }, + { + "dataPath": "params_shard_251.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.60.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "213bed5c3c1c75e5065104a3249ffd79" + }, + { + "dataPath": "params_shard_252.bin", + "format": "raw-shard", + "nbytes": 283115520, + "records": [ + { + "name": "model.layers.61.mlp.down_proj.weight", + "shape": [ + 5120, + 27648 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 283115520, + "byteOffset": 0 + } + ], + "md5sum": "6c9d7e2ee8b3fcc633559a1c71160802" + }, + { + "dataPath": "params_shard_253.bin", + "format": "raw-shard", + "nbytes": 566231040, + "records": [ + { + "name": "model.layers.61.mlp.gate_up_proj.weight", + "shape": [ + 55296, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 566231040, + "byteOffset": 0 + } + ], + "md5sum": "40ca240fbd828f636fe6361887f4f3a9" + }, + { + "dataPath": "params_shard_254.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.61.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "356b79c9417873073adb840aa44c8b25" + }, + { + "dataPath": "params_shard_255.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.61.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "9b86e7ddd2bf311993557233996e7a0e" + }, + { + "dataPath": "params_shard_256.bin", + "format": "raw-shard", + "nbytes": 73400320, + "records": [ + { + "name": "model.layers.62.self_attn.c_attn.weight", + "shape": [ + 7168, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 73400320, + "byteOffset": 0 + } + ], + "md5sum": "38d0b7fc31df42bfa81ca0af57b13e7b" + }, + { + "dataPath": "params_shard_257.bin", + "format": "raw-shard", + "nbytes": 52428800, + "records": [ + { + "name": "model.layers.62.self_attn.o_proj.weight", + "shape": [ + 5120, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 52428800, + "byteOffset": 0 + } + ], + "md5sum": "ee566b569ecac8bfe4ef1aa2ffc00857" + }, + { + "dataPath": "params_shard_258.bin", + "format": "raw-shard", + "nbytes": 2238464, + "records": [ + { + "name": "model.layers.63.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 0 + }, + { + "name": "model.layers.63.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 10240 + }, + { + "name": "model.layers.63.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 20480 + }, + { + "name": "model.norm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 34816 + }, + { + "name": "model.layers.0.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 45056 + }, + { + "name": "model.layers.0.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 55296 + }, + { + "name": "model.layers.0.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 65536 + }, + { + "name": "model.layers.1.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 79872 + }, + { + "name": "model.layers.1.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 90112 + }, + { + "name": "model.layers.1.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 100352 + }, + { + "name": "model.layers.2.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 114688 + }, + { + "name": "model.layers.2.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 124928 + }, + { + "name": "model.layers.2.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 135168 + }, + { + "name": "model.layers.10.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 149504 + }, + { + "name": "model.layers.10.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 159744 + }, + { + "name": "model.layers.10.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 169984 + }, + { + "name": "model.layers.7.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 184320 + }, + { + "name": "model.layers.7.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 194560 + }, + { + "name": "model.layers.7.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 204800 + }, + { + "name": "model.layers.8.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 219136 + }, + { + "name": "model.layers.8.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 229376 + }, + { + "name": "model.layers.8.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 239616 + }, + { + "name": "model.layers.9.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 253952 + }, + { + "name": "model.layers.9.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 264192 + }, + { + "name": "model.layers.9.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 274432 + }, + { + "name": "model.layers.11.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 288768 + }, + { + "name": "model.layers.11.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 299008 + }, + { + "name": "model.layers.11.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 309248 + }, + { + "name": "model.layers.12.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 323584 + }, + { + "name": "model.layers.12.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 333824 + }, + { + "name": "model.layers.12.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 344064 + }, + { + "name": "model.layers.13.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 358400 + }, + { + "name": "model.layers.13.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 368640 + }, + { + "name": "model.layers.13.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 378880 + }, + { + "name": "model.layers.14.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 393216 + }, + { + "name": "model.layers.14.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 403456 + }, + { + "name": "model.layers.14.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 413696 + }, + { + "name": "model.layers.15.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 428032 + }, + { + "name": "model.layers.15.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 438272 + }, + { + "name": "model.layers.15.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 448512 + }, + { + "name": "model.layers.16.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 462848 + }, + { + "name": "model.layers.16.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 473088 + }, + { + "name": "model.layers.16.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 483328 + }, + { + "name": "model.layers.17.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 497664 + }, + { + "name": "model.layers.17.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 507904 + }, + { + "name": "model.layers.17.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 518144 + }, + { + "name": "model.layers.18.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 532480 + }, + { + "name": "model.layers.18.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 542720 + }, + { + "name": "model.layers.18.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 552960 + }, + { + "name": "model.layers.19.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 567296 + }, + { + "name": "model.layers.19.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 577536 + }, + { + "name": "model.layers.19.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 587776 + }, + { + "name": "model.layers.20.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 602112 + }, + { + "name": "model.layers.20.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 612352 + }, + { + "name": "model.layers.20.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 622592 + }, + { + "name": "model.layers.21.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 636928 + }, + { + "name": "model.layers.21.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 647168 + }, + { + "name": "model.layers.21.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 657408 + }, + { + "name": "model.layers.22.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 671744 + }, + { + "name": "model.layers.22.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 681984 + }, + { + "name": "model.layers.22.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 692224 + }, + { + "name": "model.layers.3.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 706560 + }, + { + "name": "model.layers.3.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 716800 + }, + { + "name": "model.layers.3.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 727040 + }, + { + "name": "model.layers.4.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 741376 + }, + { + "name": "model.layers.4.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 751616 + }, + { + "name": "model.layers.4.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 761856 + }, + { + "name": "model.layers.5.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 776192 + }, + { + "name": "model.layers.5.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 786432 + }, + { + "name": "model.layers.5.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 796672 + }, + { + "name": "model.layers.6.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 811008 + }, + { + "name": "model.layers.6.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 821248 + }, + { + "name": "model.layers.6.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 831488 + }, + { + "name": "model.layers.23.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 845824 + }, + { + "name": "model.layers.23.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 856064 + }, + { + "name": "model.layers.23.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 866304 + }, + { + "name": "model.layers.24.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 880640 + }, + { + "name": "model.layers.24.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 890880 + }, + { + "name": "model.layers.24.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 901120 + }, + { + "name": "model.layers.25.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 915456 + }, + { + "name": "model.layers.25.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 925696 + }, + { + "name": "model.layers.25.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 935936 + }, + { + "name": "model.layers.26.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 950272 + }, + { + "name": "model.layers.26.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 960512 + }, + { + "name": "model.layers.26.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 970752 + }, + { + "name": "model.layers.27.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 985088 + }, + { + "name": "model.layers.27.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 995328 + }, + { + "name": "model.layers.27.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 1005568 + }, + { + "name": "model.layers.28.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1019904 + }, + { + "name": "model.layers.28.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1030144 + }, + { + "name": "model.layers.28.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 1040384 + }, + { + "name": "model.layers.29.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1054720 + }, + { + "name": "model.layers.29.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1064960 + }, + { + "name": "model.layers.29.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 1075200 + }, + { + "name": "model.layers.30.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1089536 + }, + { + "name": "model.layers.30.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1099776 + }, + { + "name": "model.layers.30.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 1110016 + }, + { + "name": "model.layers.31.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1124352 + }, + { + "name": "model.layers.31.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1134592 + }, + { + "name": "model.layers.31.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 1144832 + }, + { + "name": "model.layers.32.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1159168 + }, + { + "name": "model.layers.32.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1169408 + }, + { + "name": "model.layers.32.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 1179648 + }, + { + "name": "model.layers.33.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1193984 + }, + { + "name": "model.layers.33.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1204224 + }, + { + "name": "model.layers.33.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 1214464 + }, + { + "name": "model.layers.34.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1228800 + }, + { + "name": "model.layers.34.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1239040 + }, + { + "name": "model.layers.34.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 1249280 + }, + { + "name": "model.layers.35.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1263616 + }, + { + "name": "model.layers.35.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1273856 + }, + { + "name": "model.layers.35.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 1284096 + }, + { + "name": "model.layers.36.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1298432 + }, + { + "name": "model.layers.36.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1308672 + }, + { + "name": "model.layers.36.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 1318912 + }, + { + "name": "model.layers.37.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1333248 + }, + { + "name": "model.layers.37.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1343488 + }, + { + "name": "model.layers.37.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 1353728 + }, + { + "name": "model.layers.38.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1368064 + }, + { + "name": "model.layers.38.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1378304 + }, + { + "name": "model.layers.38.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 1388544 + }, + { + "name": "model.layers.39.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1402880 + }, + { + "name": "model.layers.39.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1413120 + }, + { + "name": "model.layers.39.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 1423360 + }, + { + "name": "model.layers.40.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1437696 + }, + { + "name": "model.layers.40.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1447936 + }, + { + "name": "model.layers.40.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 1458176 + }, + { + "name": "model.layers.41.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1472512 + }, + { + "name": "model.layers.41.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1482752 + }, + { + "name": "model.layers.41.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 1492992 + }, + { + "name": "model.layers.42.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1507328 + }, + { + "name": "model.layers.42.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1517568 + }, + { + "name": "model.layers.42.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 1527808 + }, + { + "name": "model.layers.43.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1542144 + }, + { + "name": "model.layers.43.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1552384 + }, + { + "name": "model.layers.43.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 1562624 + }, + { + "name": "model.layers.44.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1576960 + }, + { + "name": "model.layers.44.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1587200 + }, + { + "name": "model.layers.44.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 1597440 + }, + { + "name": "model.layers.45.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1611776 + }, + { + "name": "model.layers.45.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1622016 + }, + { + "name": "model.layers.45.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 1632256 + }, + { + "name": "model.layers.46.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1646592 + }, + { + "name": "model.layers.46.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1656832 + }, + { + "name": "model.layers.46.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 1667072 + }, + { + "name": "model.layers.47.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1681408 + }, + { + "name": "model.layers.47.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1691648 + }, + { + "name": "model.layers.47.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 1701888 + }, + { + "name": "model.layers.48.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1716224 + }, + { + "name": "model.layers.48.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1726464 + }, + { + "name": "model.layers.48.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 1736704 + }, + { + "name": "model.layers.49.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1751040 + }, + { + "name": "model.layers.49.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1761280 + }, + { + "name": "model.layers.49.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 1771520 + }, + { + "name": "model.layers.50.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1785856 + }, + { + "name": "model.layers.50.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1796096 + }, + { + "name": "model.layers.50.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 1806336 + }, + { + "name": "model.layers.51.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1820672 + }, + { + "name": "model.layers.51.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1830912 + }, + { + "name": "model.layers.51.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 1841152 + }, + { + "name": "model.layers.52.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1855488 + }, + { + "name": "model.layers.52.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1865728 + }, + { + "name": "model.layers.52.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 1875968 + }, + { + "name": "model.layers.53.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1890304 + }, + { + "name": "model.layers.53.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1900544 + }, + { + "name": "model.layers.53.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 1910784 + }, + { + "name": "model.layers.54.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1925120 + }, + { + "name": "model.layers.54.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1935360 + }, + { + "name": "model.layers.54.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 1945600 + }, + { + "name": "model.layers.55.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1959936 + }, + { + "name": "model.layers.55.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1970176 + }, + { + "name": "model.layers.55.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 1980416 + }, + { + "name": "model.layers.56.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 1994752 + }, + { + "name": "model.layers.56.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 2004992 + }, + { + "name": "model.layers.56.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 2015232 + }, + { + "name": "model.layers.57.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 2029568 + }, + { + "name": "model.layers.57.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 2039808 + }, + { + "name": "model.layers.57.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 2050048 + }, + { + "name": "model.layers.58.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 2064384 + }, + { + "name": "model.layers.58.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 2074624 + }, + { + "name": "model.layers.58.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 2084864 + }, + { + "name": "model.layers.59.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 2099200 + }, + { + "name": "model.layers.59.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 2109440 + }, + { + "name": "model.layers.59.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 2119680 + }, + { + "name": "model.layers.60.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 2134016 + }, + { + "name": "model.layers.60.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 2144256 + }, + { + "name": "model.layers.60.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 2154496 + }, + { + "name": "model.layers.61.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 2168832 + }, + { + "name": "model.layers.61.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 2179072 + }, + { + "name": "model.layers.61.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 2189312 + }, + { + "name": "model.layers.62.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 2203648 + }, + { + "name": "model.layers.62.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 2213888 + }, + { + "name": "model.layers.62.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 2224128 + } + ], + "md5sum": "a3d9affbabb167395433a90aaa743ac2" + } + ] +} \ No newline at end of file