diff --git "a/ndarray-cache.json" "b/ndarray-cache.json" new file mode 100644--- /dev/null +++ "b/ndarray-cache.json" @@ -0,0 +1,3137 @@ +{ + "metadata": { + "ParamSize": 195, + "ParamBytes": 16060522496.0, + "BitsPerParam": 16.0 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 1050673152, + "records": [ + { + "name": "lm_head.weight", + "shape": [ + 128256, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1050673152, + "byteOffset": 0 + } + ], + "md5sum": "ab044d64a21c0b50372b39087584bd28" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.31.mlp.down_proj.weight", + "shape": [ + 4096, + 14336 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "e83f1e2cc19089959c456b650f3198d2" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 1050673152, + "records": [ + { + "name": "model.embed_tokens.weight", + "shape": [ + 128256, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1050673152, + "byteOffset": 0 + } + ], + "md5sum": "9173e0a64e7a2afbf7246af0d61d0f2a" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.0.mlp.down_proj.weight", + "shape": [ + 4096, + 14336 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "074b67ffa12db1f84b189b34b9f01706" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.weight", + "shape": [ + 28672, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "130de9314f97a23dd50f391b2202f49c" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.0.self_attn.qkv_proj.weight", + "shape": [ + 6144, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "273d15fa6b10b27f056ba451a292d63c" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.0.self_attn.o_proj.weight", + "shape": [ + 4096, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "74290290ded4fab3209aa7323f851e44" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.1.mlp.down_proj.weight", + "shape": [ + 4096, + 14336 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "6473a4ef236986c12915d4e5ae983b0d" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.weight", + "shape": [ + 28672, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "e511945ca4e31a37fd5e420a49267683" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.1.self_attn.qkv_proj.weight", + "shape": [ + 6144, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "f98f42da50a5f87f2f01f00b8c94c141" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.1.self_attn.o_proj.weight", + "shape": [ + 4096, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "5f19dbaa0cfbdada8479b2682337c567" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.2.mlp.down_proj.weight", + "shape": [ + 4096, + 14336 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "1a7d2e40aff4908a75b9eda89d4b62ff" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.weight", + "shape": [ + 28672, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "a7d327377b578073545d51e0d01c99a3" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.2.self_attn.qkv_proj.weight", + "shape": [ + 6144, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "7afdd4b730ee23c49cb32b0beff83208" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.2.self_attn.o_proj.weight", + "shape": [ + 4096, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "38d47f9c02e6a7b414779287464c73e7" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.3.mlp.down_proj.weight", + "shape": [ + 4096, + 14336 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "1238b1933fee5007cca7c0adeff08e89" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.weight", + "shape": [ + 28672, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "7dc2985efd9334cd258e56f5424942ef" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.3.self_attn.qkv_proj.weight", + "shape": [ + 6144, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "a1c6847ff24d43edef06cb3401e8346b" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.3.self_attn.o_proj.weight", + "shape": [ + 4096, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "5f50a02405c28c0a5d3d6627cfbf2ce8" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.4.mlp.down_proj.weight", + "shape": [ + 4096, + 14336 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "b64e456f270c948f8467d182ab71061b" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.weight", + "shape": [ + 28672, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "ed8bd7fa1f3c5d360685c1560754f3f4" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.4.self_attn.qkv_proj.weight", + "shape": [ + 6144, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "836d5bebaf525b7535d08b8f551f49c3" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.4.self_attn.o_proj.weight", + "shape": [ + 4096, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "9a736b49ceb7d7750e073c10942d86a5" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.5.mlp.down_proj.weight", + "shape": [ + 4096, + 14336 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "bf8878e4abac3fcc1053133561d621eb" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.weight", + "shape": [ + 28672, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "4e45d5a5990d85e804ec47fd4c1de62b" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.5.self_attn.qkv_proj.weight", + "shape": [ + 6144, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "c249ea2d99db8e31414b679a75b309e7" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.5.self_attn.o_proj.weight", + "shape": [ + 4096, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "069b27afe17a1a3b898dae0328ad4ca2" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.6.mlp.down_proj.weight", + "shape": [ + 4096, + 14336 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "79c6f7d8f386e769030d53ac05293932" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.weight", + "shape": [ + 28672, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "cf4f65e16b33af859268b3662a325fab" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.6.self_attn.qkv_proj.weight", + "shape": [ + 6144, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "8d86764287cebb9a9e3baa5bb73a8940" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.6.self_attn.o_proj.weight", + "shape": [ + 4096, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "1b0865540e183e98aa760575b63bdf88" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.7.mlp.down_proj.weight", + "shape": [ + 4096, + 14336 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "0cda178ca4e22a56385ac0549ef45757" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.weight", + "shape": [ + 28672, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "e5516292dd3ef234329f688b75ecd38b" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.7.self_attn.qkv_proj.weight", + "shape": [ + 6144, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "5f0369c9c3ae61a56eb74ca6b4b3d8f6" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.7.self_attn.o_proj.weight", + "shape": [ + 4096, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "fe3ccbfda9d40805e3a3087a3aca98b0" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.8.mlp.down_proj.weight", + "shape": [ + 4096, + 14336 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "32f69edbed11bf4a097234262c5a6cee" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.weight", + "shape": [ + 28672, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "fd349d55b440082f71a885698869fd0e" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.8.self_attn.qkv_proj.weight", + "shape": [ + 6144, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "2866aee7238b305d1d42e41c1f18b9e5" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.8.self_attn.o_proj.weight", + "shape": [ + 4096, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "d663d2dc7e1f91d1203d17d005093898" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.10.mlp.down_proj.weight", + "shape": [ + 4096, + 14336 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "925c2ab44db524ac8a59b3363bda2c22" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.weight", + "shape": [ + 28672, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "1a8fa4fd2a4f3d5b9837ef6efc6ce151" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.10.self_attn.qkv_proj.weight", + "shape": [ + 6144, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "4e6807ae0e9097d6ab0fdfaf89eb39f3" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.10.self_attn.o_proj.weight", + "shape": [ + 4096, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "323bb70d863cb2c144a466e53cbe03ce" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.11.mlp.down_proj.weight", + "shape": [ + 4096, + 14336 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "c7f321fb722164f56e34c5b43b2c4ea6" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.weight", + "shape": [ + 28672, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "b3b5c0558a96f7517932b37e3e0f6911" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.11.self_attn.qkv_proj.weight", + "shape": [ + 6144, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "fec6470984c0b3280a05af5e76059e26" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.11.self_attn.o_proj.weight", + "shape": [ + 4096, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "abf3a9ea6d9f045b9c2c5e185c6474c5" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.12.mlp.down_proj.weight", + "shape": [ + 4096, + 14336 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "233ba942f895e08928843a79f64417b6" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.weight", + "shape": [ + 28672, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "8a01562bade836a614091d72ca4ce5c0" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.12.self_attn.qkv_proj.weight", + "shape": [ + 6144, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "878d80c7675f15369704337c94ec10c8" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.12.self_attn.o_proj.weight", + "shape": [ + 4096, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "0343a1994ccaf3cf0a923061da2e2126" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.13.mlp.down_proj.weight", + "shape": [ + 4096, + 14336 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "cbdaac3fa8a5e7a9778ee7ee59e25988" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.13.mlp.gate_up_proj.weight", + "shape": [ + 28672, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "82a909c10067de8c593c24d4dbfe51db" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.13.self_attn.qkv_proj.weight", + "shape": [ + 6144, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "4d9b157a5bedfa17fe03c5961928b010" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.13.self_attn.o_proj.weight", + "shape": [ + 4096, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "40b6be3846b8c34811c7efebe828d743" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.14.mlp.down_proj.weight", + "shape": [ + 4096, + 14336 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "27f032d257c560ad1db0ad1718c0713e" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.weight", + "shape": [ + 28672, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "30a98b08c20e73ab141879f2dd2a0c38" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.14.self_attn.qkv_proj.weight", + "shape": [ + 6144, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "760747b13020079ff659555e4febd337" + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.14.self_attn.o_proj.weight", + "shape": [ + 4096, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "65d1dde6c76d9f529762854aa3623e81" + }, + { + "dataPath": "params_shard_59.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.15.mlp.down_proj.weight", + "shape": [ + 4096, + 14336 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "f3cc031a15ce0ef0e0076957ae2e71ea" + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.15.mlp.gate_up_proj.weight", + "shape": [ + 28672, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "f73685842211aab8399e314554b9c0b9" + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.15.self_attn.qkv_proj.weight", + "shape": [ + 6144, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "9c6f55b644e23cd9bf834db37eeaa27a" + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.15.self_attn.o_proj.weight", + "shape": [ + 4096, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "003166d50193c9d3c2c7c88990b6643d" + }, + { + "dataPath": "params_shard_63.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.16.mlp.down_proj.weight", + "shape": [ + 4096, + 14336 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "3196db7d3c4bfd84419c6186a094fcc1" + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.weight", + "shape": [ + 28672, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "a2f51f69d620156b458ac8152358bf26" + }, + { + "dataPath": "params_shard_65.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.16.self_attn.qkv_proj.weight", + "shape": [ + 6144, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "0f24d454dffc08bb510649f160e2250c" + }, + { + "dataPath": "params_shard_66.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.16.self_attn.o_proj.weight", + "shape": [ + 4096, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "1844e2eacfbda5edf23c6d6d3cefadea" + }, + { + "dataPath": "params_shard_67.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.17.mlp.down_proj.weight", + "shape": [ + 4096, + 14336 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "003ff48334f68ead99eecb619a54776b" + }, + { + "dataPath": "params_shard_68.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.17.mlp.gate_up_proj.weight", + "shape": [ + 28672, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "94d130b9f23bd832aeb921f1001b6d26" + }, + { + "dataPath": "params_shard_69.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.17.self_attn.qkv_proj.weight", + "shape": [ + 6144, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "e6a47a48c543b71deb13c83792533b17" + }, + { + "dataPath": "params_shard_70.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.17.self_attn.o_proj.weight", + "shape": [ + 4096, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "03917b40df2b4deb6ac3aae9d6102deb" + }, + { + "dataPath": "params_shard_71.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.18.mlp.down_proj.weight", + "shape": [ + 4096, + 14336 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "d02130184bf405140af14435bc1e9a23" + }, + { + "dataPath": "params_shard_72.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.18.mlp.gate_up_proj.weight", + "shape": [ + 28672, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "d8c017c803f5c847b44bfba581fd3803" + }, + { + "dataPath": "params_shard_73.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.18.self_attn.qkv_proj.weight", + "shape": [ + 6144, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "de1e850cfbe3158996546bc21fbd603b" + }, + { + "dataPath": "params_shard_74.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.18.self_attn.o_proj.weight", + "shape": [ + 4096, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "fb81401942ac83b8c649f7df1f102253" + }, + { + "dataPath": "params_shard_75.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.19.mlp.down_proj.weight", + "shape": [ + 4096, + 14336 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "a003022bd587b79322bf2bc56f940c05" + }, + { + "dataPath": "params_shard_76.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.19.mlp.gate_up_proj.weight", + "shape": [ + 28672, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "cf8b74128e12061958317edb57df8b4f" + }, + { + "dataPath": "params_shard_77.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.19.self_attn.qkv_proj.weight", + "shape": [ + 6144, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "c9f660956b4eaad4cf7b2cb96650ca0c" + }, + { + "dataPath": "params_shard_78.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.19.self_attn.o_proj.weight", + "shape": [ + 4096, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "b8580f8d001049726c59d42cdb71828e" + }, + { + "dataPath": "params_shard_79.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.weight", + "shape": [ + 28672, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "f233cdabc9ee65792ebd6eac9d1d9874" + }, + { + "dataPath": "params_shard_80.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.20.self_attn.qkv_proj.weight", + "shape": [ + 6144, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "b3e5717dd467c149539e38c66c5bc5b4" + }, + { + "dataPath": "params_shard_81.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.20.self_attn.o_proj.weight", + "shape": [ + 4096, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "85c9f9d72884046c481786edbc0587e5" + }, + { + "dataPath": "params_shard_82.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.9.mlp.down_proj.weight", + "shape": [ + 4096, + 14336 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "c6c330b693e650e5ab0e1c561324f6c9" + }, + { + "dataPath": "params_shard_83.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.9.mlp.gate_up_proj.weight", + "shape": [ + 28672, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "331912e06cff408b4d0425f7a3e97d4c" + }, + { + "dataPath": "params_shard_84.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.9.self_attn.qkv_proj.weight", + "shape": [ + 6144, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "f386fefd4c982110922135655652d991" + }, + { + "dataPath": "params_shard_85.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.9.self_attn.o_proj.weight", + "shape": [ + 4096, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "ac19ed8bf3a33c98459116a83c7dd63c" + }, + { + "dataPath": "params_shard_86.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.20.mlp.down_proj.weight", + "shape": [ + 4096, + 14336 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "b8a89f9821dba551444ae7d18b85fafd" + }, + { + "dataPath": "params_shard_87.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.21.mlp.down_proj.weight", + "shape": [ + 4096, + 14336 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "2641b19a83e4d29d3f346653357f39c0" + }, + { + "dataPath": "params_shard_88.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.21.mlp.gate_up_proj.weight", + "shape": [ + 28672, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "c358f9fdd406d0e40892546a565e3a8f" + }, + { + "dataPath": "params_shard_89.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.21.self_attn.qkv_proj.weight", + "shape": [ + 6144, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "8422e2ed0a8b66d76ee6df505748a6fe" + }, + { + "dataPath": "params_shard_90.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.21.self_attn.o_proj.weight", + "shape": [ + 4096, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "46f693e10b6ff3b90d2e3c0968fe2087" + }, + { + "dataPath": "params_shard_91.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.22.mlp.down_proj.weight", + "shape": [ + 4096, + 14336 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "e33ff9e6be0654fa0f7e8c2444bc4295" + }, + { + "dataPath": "params_shard_92.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.weight", + "shape": [ + 28672, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "b8e3e09f36f6a8208734eeb0f79d2271" + }, + { + "dataPath": "params_shard_93.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.22.self_attn.qkv_proj.weight", + "shape": [ + 6144, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "52420bd814bae89e248cc5e1955a29b2" + }, + { + "dataPath": "params_shard_94.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.22.self_attn.o_proj.weight", + "shape": [ + 4096, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "cdbdcd48c5b8374c2c2442c6c7270a2b" + }, + { + "dataPath": "params_shard_95.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.23.mlp.down_proj.weight", + "shape": [ + 4096, + 14336 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "9d188b568c15b2a12dcbcceef31f11f5" + }, + { + "dataPath": "params_shard_96.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.23.mlp.gate_up_proj.weight", + "shape": [ + 28672, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "19e1521d09db01626a3782eae391afe3" + }, + { + "dataPath": "params_shard_97.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.23.self_attn.qkv_proj.weight", + "shape": [ + 6144, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "3dd1949840ffe0f40b054c8cfa0f957f" + }, + { + "dataPath": "params_shard_98.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.23.self_attn.o_proj.weight", + "shape": [ + 4096, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "537faab6244365e91672d818f15e2c36" + }, + { + "dataPath": "params_shard_99.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.24.mlp.down_proj.weight", + "shape": [ + 4096, + 14336 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "3d73e1a0cd759fd5203ad309e1e387ad" + }, + { + "dataPath": "params_shard_100.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.weight", + "shape": [ + 28672, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "43e4de1dae3d005cfff0dc4fde9375b8" + }, + { + "dataPath": "params_shard_101.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.24.self_attn.qkv_proj.weight", + "shape": [ + 6144, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "e5c539b8856e7f495aa1a7ffd5b85431" + }, + { + "dataPath": "params_shard_102.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.24.self_attn.o_proj.weight", + "shape": [ + 4096, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "9779dc4a784e935250107c6ab5d46741" + }, + { + "dataPath": "params_shard_103.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.25.mlp.down_proj.weight", + "shape": [ + 4096, + 14336 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "e7d12480df8ccd276dc9c50c3b9231c8" + }, + { + "dataPath": "params_shard_104.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.25.mlp.gate_up_proj.weight", + "shape": [ + 28672, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "fda30d80d4f98df483fd613932a3aa4a" + }, + { + "dataPath": "params_shard_105.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.25.self_attn.qkv_proj.weight", + "shape": [ + 6144, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "27c52e2b46b89e5452f2ea82ed1d9227" + }, + { + "dataPath": "params_shard_106.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.25.self_attn.o_proj.weight", + "shape": [ + 4096, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "88a047325b789bf72df5c46f9fb880e9" + }, + { + "dataPath": "params_shard_107.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.26.mlp.down_proj.weight", + "shape": [ + 4096, + 14336 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "7ff6651b7722d3d7afc37f8269e15661" + }, + { + "dataPath": "params_shard_108.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.26.mlp.gate_up_proj.weight", + "shape": [ + 28672, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "4ee71a7f1cc42eac5e0746e37da687f3" + }, + { + "dataPath": "params_shard_109.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.26.self_attn.qkv_proj.weight", + "shape": [ + 6144, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "c679b63c5b85d3fa15c2fcbd49fbc48a" + }, + { + "dataPath": "params_shard_110.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.26.self_attn.o_proj.weight", + "shape": [ + 4096, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "bb01183b273e4ac7626e02e9c15d50f3" + }, + { + "dataPath": "params_shard_111.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.27.mlp.down_proj.weight", + "shape": [ + 4096, + 14336 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "28ba9167c2ae77bae8ffbc798baf4d9e" + }, + { + "dataPath": "params_shard_112.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.27.mlp.gate_up_proj.weight", + "shape": [ + 28672, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "fb236297c897d1cd61904686d64ad57d" + }, + { + "dataPath": "params_shard_113.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.27.self_attn.qkv_proj.weight", + "shape": [ + 6144, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "5006bb6c3349112342a2d2251b933664" + }, + { + "dataPath": "params_shard_114.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.27.self_attn.o_proj.weight", + "shape": [ + 4096, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "1206de1cc36f390118834aea286669d4" + }, + { + "dataPath": "params_shard_115.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.28.mlp.down_proj.weight", + "shape": [ + 4096, + 14336 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "48b211965442f9ae7d6160b63dcbf305" + }, + { + "dataPath": "params_shard_116.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.28.mlp.gate_up_proj.weight", + "shape": [ + 28672, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "bfca698ecb180ae5d6ed4926f6cf96a5" + }, + { + "dataPath": "params_shard_117.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.28.self_attn.qkv_proj.weight", + "shape": [ + 6144, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "53683c48f4ba912d5385478cd0fd171f" + }, + { + "dataPath": "params_shard_118.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.28.self_attn.o_proj.weight", + "shape": [ + 4096, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "8ef7f64fe2b708ffbb74d8a9d21e67f3" + }, + { + "dataPath": "params_shard_119.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.29.mlp.down_proj.weight", + "shape": [ + 4096, + 14336 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "c8ade2308ced3a863055b4e93604935c" + }, + { + "dataPath": "params_shard_120.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.29.mlp.gate_up_proj.weight", + "shape": [ + 28672, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "048ea17d590d4d44983d33399916f192" + }, + { + "dataPath": "params_shard_121.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.29.self_attn.qkv_proj.weight", + "shape": [ + 6144, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "3b528fdc45d7119609d89dad205e005f" + }, + { + "dataPath": "params_shard_122.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.29.self_attn.o_proj.weight", + "shape": [ + 4096, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "a3e45c58a78945cad031d419255f90c4" + }, + { + "dataPath": "params_shard_123.bin", + "format": "raw-shard", + "nbytes": 117440512, + "records": [ + { + "name": "model.layers.30.mlp.down_proj.weight", + "shape": [ + 4096, + 14336 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 117440512, + "byteOffset": 0 + } + ], + "md5sum": "df06e6e191ec47c124fbc4fba84f4436" + }, + { + "dataPath": "params_shard_124.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.30.mlp.gate_up_proj.weight", + "shape": [ + 28672, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "7941bafbff13f9a337e5132077d80067" + }, + { + "dataPath": "params_shard_125.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.30.self_attn.qkv_proj.weight", + "shape": [ + 6144, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "963452db05d3fa0f9e790b27c36ce354" + }, + { + "dataPath": "params_shard_126.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.30.self_attn.o_proj.weight", + "shape": [ + 4096, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "f606499d17e01e1eac81c86949643504" + }, + { + "dataPath": "params_shard_127.bin", + "format": "raw-shard", + "nbytes": 234881024, + "records": [ + { + "name": "model.layers.31.mlp.gate_up_proj.weight", + "shape": [ + 28672, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 234881024, + "byteOffset": 0 + } + ], + "md5sum": "9bd4db193283d565489b80c4bc364b6b" + }, + { + "dataPath": "params_shard_128.bin", + "format": "raw-shard", + "nbytes": 50331648, + "records": [ + { + "name": "model.layers.31.self_attn.qkv_proj.weight", + "shape": [ + 6144, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 50331648, + "byteOffset": 0 + } + ], + "md5sum": "a659a0664be1b2712080d21f91c8da17" + }, + { + "dataPath": "params_shard_129.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.31.self_attn.o_proj.weight", + "shape": [ + 4096, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "6892044b3ede70fbb5ab24d2dbc1a05e" + }, + { + "dataPath": "params_shard_130.bin", + "format": "raw-shard", + "nbytes": 532480, + "records": [ + { + "name": "model.layers.31.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 0 + }, + { + "name": "model.layers.31.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 8192 + }, + { + "name": "model.norm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 16384 + }, + { + "name": "model.layers.0.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24576 + }, + { + "name": "model.layers.0.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32768 + }, + { + "name": "model.layers.1.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 40960 + }, + { + "name": "model.layers.1.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 49152 + }, + { + "name": "model.layers.2.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 57344 + }, + { + "name": "model.layers.2.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 65536 + }, + { + "name": "model.layers.3.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 73728 + }, + { + "name": "model.layers.3.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 81920 + }, + { + "name": "model.layers.4.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 90112 + }, + { + "name": "model.layers.4.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 98304 + }, + { + "name": "model.layers.5.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 106496 + }, + { + "name": "model.layers.5.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 114688 + }, + { + "name": "model.layers.6.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 122880 + }, + { + "name": "model.layers.6.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 131072 + }, + { + "name": "model.layers.7.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 139264 + }, + { + "name": "model.layers.7.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 147456 + }, + { + "name": "model.layers.8.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 155648 + }, + { + "name": "model.layers.8.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 163840 + }, + { + "name": "model.layers.10.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 172032 + }, + { + "name": "model.layers.10.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 180224 + }, + { + "name": "model.layers.11.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 188416 + }, + { + "name": "model.layers.11.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 196608 + }, + { + "name": "model.layers.12.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 204800 + }, + { + "name": "model.layers.12.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 212992 + }, + { + "name": "model.layers.13.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 221184 + }, + { + "name": "model.layers.13.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 229376 + }, + { + "name": "model.layers.14.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 237568 + }, + { + "name": "model.layers.14.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 245760 + }, + { + "name": "model.layers.15.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 253952 + }, + { + "name": "model.layers.15.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 262144 + }, + { + "name": "model.layers.16.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 270336 + }, + { + "name": "model.layers.16.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 278528 + }, + { + "name": "model.layers.17.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 286720 + }, + { + "name": "model.layers.17.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 294912 + }, + { + "name": "model.layers.18.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 303104 + }, + { + "name": "model.layers.18.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 311296 + }, + { + "name": "model.layers.19.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 319488 + }, + { + "name": "model.layers.19.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 327680 + }, + { + "name": "model.layers.9.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 335872 + }, + { + "name": "model.layers.9.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 344064 + }, + { + "name": "model.layers.20.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 352256 + }, + { + "name": "model.layers.20.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 360448 + }, + { + "name": "model.layers.21.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 368640 + }, + { + "name": "model.layers.21.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 376832 + }, + { + "name": "model.layers.22.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 385024 + }, + { + "name": "model.layers.22.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 393216 + }, + { + "name": "model.layers.23.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 401408 + }, + { + "name": "model.layers.23.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 409600 + }, + { + "name": "model.layers.24.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 417792 + }, + { + "name": "model.layers.24.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 425984 + }, + { + "name": "model.layers.25.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 434176 + }, + { + "name": "model.layers.25.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 442368 + }, + { + "name": "model.layers.26.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 450560 + }, + { + "name": "model.layers.26.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 458752 + }, + { + "name": "model.layers.27.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 466944 + }, + { + "name": "model.layers.27.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 475136 + }, + { + "name": "model.layers.28.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 483328 + }, + { + "name": "model.layers.28.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 491520 + }, + { + "name": "model.layers.29.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 499712 + }, + { + "name": "model.layers.29.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 507904 + }, + { + "name": "model.layers.30.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 516096 + }, + { + "name": "model.layers.30.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 524288 + } + ], + "md5sum": "60f39f4290c24e5f43ccb83308e13bc0" + } + ] +} \ No newline at end of file