diff --git a/config.json b/config.json index 088be3598d276a5092fc76270edd029a01a69175..cecf2ee2a9a9ac98dafeae355a48fe43b8bafabe 100644 --- a/config.json +++ b/config.json @@ -1,5 +1,5 @@ { - "_name_or_path": "outputs/basemodel-llama3-70b", + "_name_or_path": "shisa-v1-llama3-70b", "architectures": [ "LlamaForCausalLM" ], @@ -21,7 +21,7 @@ "rope_scaling": null, "rope_theta": 500000.0, "tie_word_embeddings": false, - "torch_dtype": "float32", + "torch_dtype": "bfloat16", "transformers_version": "4.40.2", "use_cache": false, "vocab_size": 128256 diff --git a/model-00001-of-00030.safetensors b/model-00001-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6e9245631ccdd8385100488ca957c0715d52c8e5 --- /dev/null +++ b/model-00001-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b20106aad8348a6e34bf6b81bfa568c201587c283bad01c2c70b3da5495cd098 +size 4584408808 diff --git a/model-00001-of-00062.safetensors b/model-00001-of-00062.safetensors deleted file mode 100644 index bc08e1b0102e55d0157d09db1d1c4bdd6f42fc44..0000000000000000000000000000000000000000 --- a/model-00001-of-00062.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:01cf66c97514aba3f46785e5e6062d8819ea97cae1d265bad3ecdee109ae4d2c -size 4806672984 diff --git a/model-00002-of-00030.safetensors b/model-00002-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c1be0185c736a62d90192513b8023395b7a91f69 --- /dev/null +++ b/model-00002-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2ee00a0ea5bf261ae719829cab8891d8ed45f42063041017c12d27eb7dd0446 +size 4664167376 diff --git a/model-00002-of-00062.safetensors b/model-00002-of-00062.safetensors deleted file mode 100644 index b3357e47db2d56a65dfcb3b38316539a81787149..0000000000000000000000000000000000000000 --- a/model-00002-of-00062.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b62ee649035e0b7388f1b24854a04f198640294a995f7a744da1caec8ec03b92 -size 4362142864 diff --git a/model-00003-of-00030.safetensors b/model-00003-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..94ebc8f1513d3c0d1ab561a3ee3a5fcbb7a423ea --- /dev/null +++ b/model-00003-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6da500c956c587325dc0d266670085e35e8923722b74ec5d490096e8a055bde7 +size 4999711704 diff --git a/model-00003-of-00062.safetensors b/model-00003-of-00062.safetensors deleted file mode 100644 index 9471ad77422f590e20102b4e526053ce5d0caf93..0000000000000000000000000000000000000000 --- a/model-00003-of-00062.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c3d8eefde0ef6cae563c90ef1877800dcdb1bd1968d5b01d61ea659a85b58b70 -size 4362142864 diff --git a/model-00004-of-00030.safetensors b/model-00004-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6370f8cf5d09e910facf002124442469d5fd2cfb --- /dev/null +++ b/model-00004-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4394fe80f91a5cdad14ac0e15adf42f3a568daba122a6b46acd0dbc6e9d2abd +size 4966157032 diff --git a/model-00004-of-00062.safetensors b/model-00004-of-00062.safetensors deleted file mode 100644 index ab2c4aa6d73d2099695c5f243daefd30633beb7e..0000000000000000000000000000000000000000 --- a/model-00004-of-00062.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:40fc9b683b95472af35e7ebbcf0dd6e224a444c5cf33d3c797f62405b1e2d65d -size 4966188864 diff --git a/model-00005-of-00030.safetensors b/model-00005-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..52c9e403366fafa3e7f88102fc6e1433128f9b3c --- /dev/null +++ b/model-00005-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c574c88b7fcffea1b1a725fba0db3e39bac088aa9577e21173473af9d25a4e9 +size 4664134408 diff --git a/model-00005-of-00062.safetensors b/model-00005-of-00062.safetensors deleted file mode 100644 index 7b1755ba69870fa85c1b8a519d61f87eec33f8bd..0000000000000000000000000000000000000000 --- a/model-00005-of-00062.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a408ab6709d975b33e0453beccde172b5b8460e9a2bd722fc514eea002254385 -size 4362142864 diff --git a/model-00006-of-00030.safetensors b/model-00006-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f2346e17ea41a8831a24e58054c6a5b8b746156a --- /dev/null +++ b/model-00006-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:acb926e3171e2a8ebfacf8b78489f41c49540cee9299db7de7c923d1547af7b1 +size 4664167408 diff --git a/model-00006-of-00062.safetensors b/model-00006-of-00062.safetensors deleted file mode 100644 index 9df8524dc93b347366b3fabb2c61acb5d4ea1615..0000000000000000000000000000000000000000 --- a/model-00006-of-00062.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:91124ca99209c2b0a77923c84dfa2e9a6cfdfdf14bd1eaec121da55fbe961839 -size 4362142864 diff --git a/model-00007-of-00030.safetensors b/model-00007-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7d0f35203bd0269751a0ab16883284c21ba52099 --- /dev/null +++ b/model-00007-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b5411e7649dd183fe34ad341e6e5bce66ab5454088d232be20843d3a11663d2 +size 4664167408 diff --git a/model-00007-of-00062.safetensors b/model-00007-of-00062.safetensors deleted file mode 100644 index 29c7a39fa5f1ad3e278378e1d756111f99cadb35..0000000000000000000000000000000000000000 --- a/model-00007-of-00062.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d9513b736b54849e9fbe34c0857ea54d50492761b393e8ba1a289a396353f686 -size 4966188864 diff --git a/model-00008-of-00030.safetensors b/model-00008-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e1db789a6c5b28225caeb2d29896ee5f36c456f0 --- /dev/null +++ b/model-00008-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eacbc9fc56b14be49968dca772bde2b1098a614cb2493ba71fd6e318d01be008 +size 4999711728 diff --git a/model-00008-of-00062.safetensors b/model-00008-of-00062.safetensors deleted file mode 100644 index 65b36b7e5d392eb063efa01fb212252d670679bc..0000000000000000000000000000000000000000 --- a/model-00008-of-00062.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3a1b5ee69fdb0aaa5326486137b9bea583fe587e2c9025e5e06b9ed10d7e282b -size 4362142864 diff --git a/model-00009-of-00030.safetensors b/model-00009-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4ade32f78409514d0c7b870d6edb83a0371f107e --- /dev/null +++ b/model-00009-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09e3bb0b7a891696be2c00b54aa6dc34b2db0482e35008943b4e6f415b17b10f +size 4966157056 diff --git a/model-00009-of-00062.safetensors b/model-00009-of-00062.safetensors deleted file mode 100644 index d8e12c05add7e04cba7b90179654210b52ce1a43..0000000000000000000000000000000000000000 --- a/model-00009-of-00062.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3244ecd67bab8a038f100e8c0206f3fd5bb80a6375bda516bc16707be14d4545 -size 4362142880 diff --git a/model-00010-of-00030.safetensors b/model-00010-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..67733336191483be45b408f4a17dc47728b486f0 --- /dev/null +++ b/model-00010-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63b402445b5f8aad2398613cd48e94f229617b7d43eac1457d059196c2d2fec1 +size 4664134408 diff --git a/model-00010-of-00062.safetensors b/model-00010-of-00062.safetensors deleted file mode 100644 index 01e5109aebf653dad4013ecd18b1c64f36c5e206..0000000000000000000000000000000000000000 --- a/model-00010-of-00062.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a91efcfacee117004b9c2d8bdc272748eb2afe3d2719601303e329cd89fbd8e0 -size 4966188880 diff --git a/model-00011-of-00030.safetensors b/model-00011-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6d516e4eba144c8e679adb085b407a555627c41e --- /dev/null +++ b/model-00011-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2aaa1ef82343d1663cf63648f85e285b59011a4ff6673818d854916c753adc3 +size 4664167408 diff --git a/model-00011-of-00062.safetensors b/model-00011-of-00062.safetensors deleted file mode 100644 index b0b5d53d389e841be2ef989d79d89f7b7f40683a..0000000000000000000000000000000000000000 --- a/model-00011-of-00062.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d5ea4accf1e8731c88b4abcba638242a30dce0b883c27c7a925bafd05f482522 -size 4362142872 diff --git a/model-00012-of-00030.safetensors b/model-00012-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..868a2a15cd6a5dee5efecfefb546e08c1abc3d37 --- /dev/null +++ b/model-00012-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c255711407f5117eeaac7be56472458136f4d7c45997fa8b8fa92bdfd4ebd0a0 +size 4664167408 diff --git a/model-00012-of-00062.safetensors b/model-00012-of-00062.safetensors deleted file mode 100644 index 2fb7c59ddd876c2338fb09830f8ea688bffe2111..0000000000000000000000000000000000000000 --- a/model-00012-of-00062.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:50b9fda66dc46f4717abd0ab30dd5b64b2cca3b3528ec0f199a144ecd1408de2 -size 4362142872 diff --git a/model-00013-of-00030.safetensors b/model-00013-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..35ee8a1d39ff5aec74db1cfb38c7c157e1a83179 --- /dev/null +++ b/model-00013-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd4c0a174c1aaa800046c71720e808004a8156e0891093a6bf098939db405fe0 +size 4999711728 diff --git a/model-00013-of-00062.safetensors b/model-00013-of-00062.safetensors deleted file mode 100644 index 16838c60e24fb3f311bd1872eab5ae7818e077cc..0000000000000000000000000000000000000000 --- a/model-00013-of-00062.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:90cb95544841ed18f9d05df1115bb2c7fd12508a948dceadf8b71bfa0d5041d4 -size 4966188880 diff --git a/model-00014-of-00030.safetensors b/model-00014-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..933c8bfba14fb47ccef7e7d7b420071081826ec5 --- /dev/null +++ b/model-00014-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8370523d7031a6778c7e7fcdf6986ebcdb96f230a0a8779a7468dd7c0aa0ae97 +size 4966157056 diff --git a/model-00014-of-00062.safetensors b/model-00014-of-00062.safetensors deleted file mode 100644 index 9654fc498a78a972272cf6628821c876d6389fab..0000000000000000000000000000000000000000 --- a/model-00014-of-00062.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:7bce48749a63bb03fbdbefb45fc4eb2628293bb9b45de5a624250b17df346df2 -size 4362142872 diff --git a/model-00015-of-00030.safetensors b/model-00015-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..517a4dcde95e194e160bf986a1b49b0df39e567a --- /dev/null +++ b/model-00015-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3334f9991f3ff8e684bfab002e0f8febdcb9a65d831c05757227b66120af026d +size 4664134408 diff --git a/model-00015-of-00062.safetensors b/model-00015-of-00062.safetensors deleted file mode 100644 index a7179be1cab7b90e4f2a82c2723fae3e338c7720..0000000000000000000000000000000000000000 --- a/model-00015-of-00062.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:85f336aae14b3b6c8f07c04bd82d2ddae050fef4c36f7d42e5f52acbc28e0940 -size 4362142872 diff --git a/model-00016-of-00030.safetensors b/model-00016-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4909841d816e792256a2df5c1e04c5580cad7ac2 --- /dev/null +++ b/model-00016-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce8392fdca7090e4d83a71d7698ee649c0fee511fa987aff9521807a6c724ddd +size 4664167408 diff --git a/model-00016-of-00062.safetensors b/model-00016-of-00062.safetensors deleted file mode 100644 index 5eabb7e5eceaf54553f2342b34eb208a220082ad..0000000000000000000000000000000000000000 --- a/model-00016-of-00062.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f31e7623f9dfe1c79555c5d9ff1fbcda784e489a657ac4d83bd89ec28e5d49a5 -size 4966188880 diff --git a/model-00017-of-00030.safetensors b/model-00017-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ddced287ea0efa626cd29b08bbb4901915943147 --- /dev/null +++ b/model-00017-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1e15347c57fde67269dc066bcfb2bb4d432b4b80303a1b8420872e20f60bf38 +size 4664167408 diff --git a/model-00017-of-00062.safetensors b/model-00017-of-00062.safetensors deleted file mode 100644 index 068804ab46192dea655ce2a4e39c8c9551a04bd6..0000000000000000000000000000000000000000 --- a/model-00017-of-00062.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8b1fbcf948bf401eb37b6091d3c67bc9973a4dc6d81bc70005f45061d1e6ddf0 -size 4362142872 diff --git a/model-00018-of-00030.safetensors b/model-00018-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..33c28c261f2689976ea0a2ded513b3280aca213c --- /dev/null +++ b/model-00018-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce6fe2a8304e7d42b138c0f26d3538c8a120dc676a6cdef13fcf53038669958c +size 4999711728 diff --git a/model-00018-of-00062.safetensors b/model-00018-of-00062.safetensors deleted file mode 100644 index cefa2bf3d6e87ba168ec0aab9771dabaf577c5ae..0000000000000000000000000000000000000000 --- a/model-00018-of-00062.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0afb4882107bf0e422a4e7f79b1f254d12a1fb3e3eb80fda0641d9ce005ed8a1 -size 4362142872 diff --git a/model-00019-of-00030.safetensors b/model-00019-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8a6161d35909673dc6b3df9877c55b690a827171 --- /dev/null +++ b/model-00019-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:257460fc0e900e6a7d947971dc8c2b963c7c12529272715fcf615e0c97c8d4f6 +size 4966157056 diff --git a/model-00019-of-00062.safetensors b/model-00019-of-00062.safetensors deleted file mode 100644 index 6c9a126fa037892d4971f367c30ca9354828f523..0000000000000000000000000000000000000000 --- a/model-00019-of-00062.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:271feaf7fe5b8aaf772c5e376db460c0fdd6578aad57e36b1f6180481ba927b6 -size 4966188880 diff --git a/model-00020-of-00030.safetensors b/model-00020-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f976c63aa09d5d8a94a13befb0b684f1b687406f --- /dev/null +++ b/model-00020-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db2b65b4f95c6948563acc67f8c42776d2a580c3ecfbf385fecf8d6de1c4b2da +size 4664134408 diff --git a/model-00020-of-00062.safetensors b/model-00020-of-00062.safetensors deleted file mode 100644 index 30f95953df577a3abb7f2639cff07c0a2204c284..0000000000000000000000000000000000000000 --- a/model-00020-of-00062.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:471818ad2685bef5b119973c47aef9e28460a5200185e0a00b18b3b7dbf10c14 -size 4362142872 diff --git a/model-00021-of-00030.safetensors b/model-00021-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..22515346d7d365a3294dac594c072a3b803cb8f5 --- /dev/null +++ b/model-00021-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:787b8b9b6f645685142bed96b203d011e50c99de3bfbbedf9ae01690e2baa4be +size 4664167408 diff --git a/model-00021-of-00062.safetensors b/model-00021-of-00062.safetensors deleted file mode 100644 index c85df0f811ad9e8bda60a9aba398462f197f0046..0000000000000000000000000000000000000000 --- a/model-00021-of-00062.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:01aea7d2aa17ec78ade5c4f0c4471bb6b94c5ee42aa394f706651abbb2e9d61e -size 4362142872 diff --git a/model-00022-of-00030.safetensors b/model-00022-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..daaffb298259d602fb21760a0d9cdb2b97e8e705 --- /dev/null +++ b/model-00022-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6ba9c4aaf3e426bafc083fae07777f7c4a714a135b3afa2210381d5341f33d7 +size 4664167408 diff --git a/model-00022-of-00062.safetensors b/model-00022-of-00062.safetensors deleted file mode 100644 index e9f4c964bbfe446f1128769ee16b8f6deee9cef9..0000000000000000000000000000000000000000 --- a/model-00022-of-00062.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:bea04cc81dc12032c266d18fa3082d1d763b2f1aa7d7c24e7d7e0611fb7f8f70 -size 4966188880 diff --git a/model-00023-of-00030.safetensors b/model-00023-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1f354ea5f1eb12fdca653cb019dc7e7f05c9a2d0 --- /dev/null +++ b/model-00023-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d91824a919c7a25f6964716ed2356802becf599215410e2ede8ca319e89cbfe7 +size 4999711728 diff --git a/model-00023-of-00062.safetensors b/model-00023-of-00062.safetensors deleted file mode 100644 index 562d5affa1272305c2aaf374d3dcc63ff26ce4fd..0000000000000000000000000000000000000000 --- a/model-00023-of-00062.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:669d8fa715990f7b876529f88976fad057511fd064ad8d71245854879258502c -size 4362142872 diff --git a/model-00024-of-00030.safetensors b/model-00024-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2bf168676b9134bf284c2361ef51698f982ce4f6 --- /dev/null +++ b/model-00024-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f3a28d6e89ec98a58e66cce87084ccb2ba0854dba976d62cdf144f0cf8962c0 +size 4966157056 diff --git a/model-00024-of-00062.safetensors b/model-00024-of-00062.safetensors deleted file mode 100644 index 817aa553beedd1bc3e71135dca324926bdc61b15..0000000000000000000000000000000000000000 --- a/model-00024-of-00062.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0d7d4748d212bfc63c86b7e0b0570f36e80d74a8edf68adfb277c3039b806087 -size 4362142872 diff --git a/model-00025-of-00030.safetensors b/model-00025-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f19afb3f04dad1aa7bac7b72a3416ee89e237b6e --- /dev/null +++ b/model-00025-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7957e5684b4368df1179374dbb78d034e44e5d47dc9ddb07b90274a4d70e219f +size 4664134408 diff --git a/model-00025-of-00062.safetensors b/model-00025-of-00062.safetensors deleted file mode 100644 index e063a2f271537960b31b2e7c75923e1682ecef80..0000000000000000000000000000000000000000 --- a/model-00025-of-00062.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:dbe649e36c4a6d9e6e08d8635ecc95fd4db3709b1f3081c47c05fb6ec9dceb1e -size 4966188880 diff --git a/model-00026-of-00030.safetensors b/model-00026-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1474dbd569482065bb5d711586d6f9956e74446d --- /dev/null +++ b/model-00026-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:936520d664a3446d3419a6d872d2d986f06ffce6e0be29b713a67a9a2fd5c5ea +size 4664167408 diff --git a/model-00026-of-00062.safetensors b/model-00026-of-00062.safetensors deleted file mode 100644 index 069222df1e1aa395fae62b73cb27dfa8939eda95..0000000000000000000000000000000000000000 --- a/model-00026-of-00062.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:32b85f3ef121521ab28114aeac0a80cfe51cb97e2109c747425de1490e6e7a9f -size 4362142872 diff --git a/model-00027-of-00030.safetensors b/model-00027-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bf2607f183974027d19b18f06b00b1066d3dacb8 --- /dev/null +++ b/model-00027-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ed83c9ee4afcefb547b0dc16df5167aba3e9cd869777796e2124849f95b3938 +size 4664167408 diff --git a/model-00027-of-00062.safetensors b/model-00027-of-00062.safetensors deleted file mode 100644 index 1e8c1e5aae20defe7bc4a2d5ad9a62ca62f70637..0000000000000000000000000000000000000000 --- a/model-00027-of-00062.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c8827a7bedb208b9eddfc61c9566fa4aa2e30f523e758043f6af09b9473038b0 -size 4362142872 diff --git a/model-00028-of-00030.safetensors b/model-00028-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fc6854318ab74bbfc5bdd3d6c160805d01363d81 --- /dev/null +++ b/model-00028-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57ae990bae2a4bf165963d0c7963a98e3630fe076f0c236b10115dfc5d26fdef +size 4999711728 diff --git a/model-00028-of-00062.safetensors b/model-00028-of-00062.safetensors deleted file mode 100644 index 561d24e924357339d2b1e59f6a9c659e0b017c51..0000000000000000000000000000000000000000 --- a/model-00028-of-00062.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:528a35676a03935bf84787c5f00f78ceb2a535a06d06912582e3dbc3defeace5 -size 4966188880 diff --git a/model-00029-of-00030.safetensors b/model-00029-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ac9fd28fb4bf5550eea7ada715e5fd2da40fb8a3 --- /dev/null +++ b/model-00029-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eaba01984b5adda19cf3843401dd1194e2ef1f545e39744878302ff7e4ccaa9a +size 4966173536 diff --git a/model-00029-of-00062.safetensors b/model-00029-of-00062.safetensors deleted file mode 100644 index a4c52c21eccadd695a8ab5b17d165dd6adcb8ab9..0000000000000000000000000000000000000000 --- a/model-00029-of-00062.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:89ad36d21a9f03feae0157e9f2b442c5fa88af76b6edfee093872510a8630a36 -size 4362142872 diff --git a/model-00030-of-00030.safetensors b/model-00030-of-00030.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d18f12e607390e6761a82b998b942b68a900bb51 --- /dev/null +++ b/model-00030-of-00030.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2af41b85f7363cb73e4c7c193bb4a785c4077b95ec4a3aee2a7c2b14dbf41b59 +size 2101346432 diff --git a/model-00030-of-00062.safetensors b/model-00030-of-00062.safetensors deleted file mode 100644 index 0059cb9250496ce8f337008dff9376464d09f5ae..0000000000000000000000000000000000000000 --- a/model-00030-of-00062.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:437dd0de8ffca897ef51b1e5c785b980e35ed9041f99680e47ce794c42d7b82e -size 4362142872 diff --git a/model-00031-of-00062.safetensors b/model-00031-of-00062.safetensors deleted file mode 100644 index 7189ed2bfce0c3b048c1903eefb759244f0bd15e..0000000000000000000000000000000000000000 --- a/model-00031-of-00062.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:22374be7a0d50fd380cf866a2b54e125845682e9573f3af1ca31816b7963e70c -size 4966188880 diff --git a/model-00032-of-00062.safetensors b/model-00032-of-00062.safetensors deleted file mode 100644 index 923c433bad7163d04bb5efa5582477b457c5037c..0000000000000000000000000000000000000000 --- a/model-00032-of-00062.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c746a7a500f83c5292ab108919f427f9b3ff4abd1b18da3fedda9a64b9739502 -size 4362142872 diff --git a/model-00033-of-00062.safetensors b/model-00033-of-00062.safetensors deleted file mode 100644 index 30a75a491bee48e1c3df424814c56ad3710d0b3e..0000000000000000000000000000000000000000 --- a/model-00033-of-00062.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8a5b0298df8acb7756b39713b24f80b7204a62ccea2ab643deec8a4364efec24 -size 4362142872 diff --git a/model-00034-of-00062.safetensors b/model-00034-of-00062.safetensors deleted file mode 100644 index 08e951fe7e192cf35d8005060fc1a1ffb7e02378..0000000000000000000000000000000000000000 --- a/model-00034-of-00062.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6f81f6480571e9c1212a2272452f2f0a158bd22ea2435fce4d682a8c58e49610 -size 4966188880 diff --git a/model-00035-of-00062.safetensors b/model-00035-of-00062.safetensors deleted file mode 100644 index b674ace9fa1348ef1384ace5788de1b09188fe60..0000000000000000000000000000000000000000 --- a/model-00035-of-00062.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b6ce3b0acabd9f0ce66cd20ecc757d251d8807d5021486e95d2a5809d1c7628f -size 4362142872 diff --git a/model-00036-of-00062.safetensors b/model-00036-of-00062.safetensors deleted file mode 100644 index f132cb636191d3e2ad096f8ea98964e799572d41..0000000000000000000000000000000000000000 --- a/model-00036-of-00062.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3e0d933d446504d07bdd2bd551d580c2730c15b1558a13b4153091e0114091d2 -size 4362142872 diff --git a/model-00037-of-00062.safetensors b/model-00037-of-00062.safetensors deleted file mode 100644 index a8a2bde00cac9a7507e178c01e724336f7360572..0000000000000000000000000000000000000000 --- a/model-00037-of-00062.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3237211aabedc8a0badb62e2b8deeb611cc65f86aa7fe377501335af76c5dd16 -size 4966188880 diff --git a/model-00038-of-00062.safetensors b/model-00038-of-00062.safetensors deleted file mode 100644 index 04406c24569575d83100ea4a288759180219dd2a..0000000000000000000000000000000000000000 --- a/model-00038-of-00062.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9734e0faad29e45b1332fb3de57bf0cdbb0512c9d71c1401188f2d89fad3ded7 -size 4362142872 diff --git a/model-00039-of-00062.safetensors b/model-00039-of-00062.safetensors deleted file mode 100644 index c7687b1f402070811fbfb7de9d3c357aba295c22..0000000000000000000000000000000000000000 --- a/model-00039-of-00062.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b9607c9b29e21e9f8f40bed6e422f65d9c331569afa2139639862bed50ce7fd1 -size 4362142872 diff --git a/model-00040-of-00062.safetensors b/model-00040-of-00062.safetensors deleted file mode 100644 index ae0c81501b2cb4ae378c04ea10672534c25c1107..0000000000000000000000000000000000000000 --- a/model-00040-of-00062.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:32802212ac2cf5c5939ad78b8f4f0168a297db70bd6e77047fbb17746e0c1467 -size 4966188880 diff --git a/model-00041-of-00062.safetensors b/model-00041-of-00062.safetensors deleted file mode 100644 index 2f088edfa7ec89c03cf252646755fdac74989373..0000000000000000000000000000000000000000 --- a/model-00041-of-00062.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:03ac0f4de6715f99cacfce1c7482acb5c1bcbff974dbf2ef515e626f927e9550 -size 4362142872 diff --git a/model-00042-of-00062.safetensors b/model-00042-of-00062.safetensors deleted file mode 100644 index 508d875a16a2800a031894fdded20ecd91eff66b..0000000000000000000000000000000000000000 --- a/model-00042-of-00062.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:cb89a712c280adaef33471c807b7716a1a9ee2c1b31dcf4bd7d3e7009404ae0e -size 4362142872 diff --git a/model-00043-of-00062.safetensors b/model-00043-of-00062.safetensors deleted file mode 100644 index c176b39bc16f5ea78013662ce0ecf5d894238742..0000000000000000000000000000000000000000 --- a/model-00043-of-00062.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:cdf16e5eadcaaeb7cd188daa8e2bea2ca53ad3f61d23b1e05722e78cca10e6dd -size 4966188880 diff --git a/model-00044-of-00062.safetensors b/model-00044-of-00062.safetensors deleted file mode 100644 index fe5b0dd09b5f1780a6cdfafadd1c9fa39e114fd1..0000000000000000000000000000000000000000 --- a/model-00044-of-00062.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a227fad2d90fad7ae60e3a15ddc30cfdda061e61bff161f82c7aca24c80d25dc -size 4362142872 diff --git a/model-00045-of-00062.safetensors b/model-00045-of-00062.safetensors deleted file mode 100644 index 729e2487c153692826fb2ae4ff0c07a96ae4c162..0000000000000000000000000000000000000000 --- a/model-00045-of-00062.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:5d5e1e8a671723086cae8cdfaeac48fe40fb2eb274e0af0e9ba2b0b8b78b2f92 -size 4362142872 diff --git a/model-00046-of-00062.safetensors b/model-00046-of-00062.safetensors deleted file mode 100644 index 8b59a05b0d932da26689c319fc5504e2254c988f..0000000000000000000000000000000000000000 --- a/model-00046-of-00062.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:daa2bbac3df9e4c0792413232e252b3c2fd399f40fd927a5d6ef505479b59138 -size 4966188880 diff --git a/model-00047-of-00062.safetensors b/model-00047-of-00062.safetensors deleted file mode 100644 index fe714f32292f5a528815a0bead81e15444c715ae..0000000000000000000000000000000000000000 --- a/model-00047-of-00062.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3c9846144ce2b1e59f712ef765201f6dd83afe0240663928602c723e0616cf0b -size 4362142872 diff --git a/model-00048-of-00062.safetensors b/model-00048-of-00062.safetensors deleted file mode 100644 index 089428c22ec6b6544d93cea8b224bdef7c0b9070..0000000000000000000000000000000000000000 --- a/model-00048-of-00062.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d65d34a41094a477b379bc30c4f4e7770529ad62efadb9d156f4530e7b44b48e -size 4362142872 diff --git a/model-00049-of-00062.safetensors b/model-00049-of-00062.safetensors deleted file mode 100644 index 94c5bee5b5b772fc8898073118f2c1db4333c6b6..0000000000000000000000000000000000000000 --- a/model-00049-of-00062.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:612c8d6cda7419a3c52988991b7272c88531d8dbfdf6ba29bca9f543e80608be -size 4966188880 diff --git a/model-00050-of-00062.safetensors b/model-00050-of-00062.safetensors deleted file mode 100644 index 1c7432dd34e054e2faf3742089ba824e3a0e6860..0000000000000000000000000000000000000000 --- a/model-00050-of-00062.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:25571907b74df7e8d618578672546c7051f0f8525fe55d50aee316df887afda0 -size 4362142872 diff --git a/model-00051-of-00062.safetensors b/model-00051-of-00062.safetensors deleted file mode 100644 index be2b19e13d14d059eda52c6e22bf0715b45a8920..0000000000000000000000000000000000000000 --- a/model-00051-of-00062.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:49460d2d4ef41496404d9b2c79294a5210d40b27f1fa41685fbccb844fdf59f2 -size 4362142872 diff --git a/model-00052-of-00062.safetensors b/model-00052-of-00062.safetensors deleted file mode 100644 index f26e7938fc3fc04cc033461594f6b3311f8279aa..0000000000000000000000000000000000000000 --- a/model-00052-of-00062.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:841b6b27cb8e485229749b961bdcf2a38dda9bc5cc54b77f3c879b976f40f44b -size 4966188880 diff --git a/model-00053-of-00062.safetensors b/model-00053-of-00062.safetensors deleted file mode 100644 index 801ad6efa0db1f87ec27eca5680b00486ecf7a51..0000000000000000000000000000000000000000 --- a/model-00053-of-00062.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:321b3ace55a693bf0f7ac3ced8bec21ebd6dd98006581873b540a60d9d63704e -size 4362142872 diff --git a/model-00054-of-00062.safetensors b/model-00054-of-00062.safetensors deleted file mode 100644 index 471f431931fbc6c93da9402ebf360553d13e62bd..0000000000000000000000000000000000000000 --- a/model-00054-of-00062.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b16ea3b8b0f0dca9fd1b731a0959d2bf8b53cc18dcdc55a82df9b5879a256a82 -size 4362142872 diff --git a/model-00055-of-00062.safetensors b/model-00055-of-00062.safetensors deleted file mode 100644 index 68580f97063274f4d8c7a6d0282a6257c61bdfb1..0000000000000000000000000000000000000000 --- a/model-00055-of-00062.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:4473622331a8223cd7c65ce366d1ee4198d37e0754f6af3e7778bd738b527146 -size 4966188880 diff --git a/model-00056-of-00062.safetensors b/model-00056-of-00062.safetensors deleted file mode 100644 index 59ffa732d7ad2d9dfdd8dcbdb82b70741ef3fb42..0000000000000000000000000000000000000000 --- a/model-00056-of-00062.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b98febb5eec6d4c98e9918b05bd20fc167e1c1bc407f4508ef27bdfc3cdb8607 -size 4362142872 diff --git a/model-00057-of-00062.safetensors b/model-00057-of-00062.safetensors deleted file mode 100644 index 2d35d03fe202b63b53093a006056609a015af399..0000000000000000000000000000000000000000 --- a/model-00057-of-00062.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:11b21d91d6707412dd835a2e55a46cd10fbab25598904d3b8d042b44440d96e9 -size 4362142872 diff --git a/model-00058-of-00062.safetensors b/model-00058-of-00062.safetensors deleted file mode 100644 index 50de6dcbfcb48f22b4f088e7112a515f8696c151..0000000000000000000000000000000000000000 --- a/model-00058-of-00062.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3fe665846e782b7d1aa5c136d1814c2624705bd8ebb6cb08998978e24b99048b -size 4966188880 diff --git a/model-00059-of-00062.safetensors b/model-00059-of-00062.safetensors deleted file mode 100644 index afba7b8059b7b04a4b08c16115a9f5999bcd6ba1..0000000000000000000000000000000000000000 --- a/model-00059-of-00062.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:793c2816c80ca9ef3df1b799be61af4a4d72238c64c20f1cdf9d778eef718295 -size 4362142872 diff --git a/model-00060-of-00062.safetensors b/model-00060-of-00062.safetensors deleted file mode 100644 index c992a90ba4e693f70d54801e37fab43a72b52764..0000000000000000000000000000000000000000 --- a/model-00060-of-00062.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:846fd3336277eb841c88eb797c29928c810a931a6114dce8503b28352487b0f6 -size 4362142872 diff --git a/model-00061-of-00062.safetensors b/model-00061-of-00062.safetensors deleted file mode 100644 index 330f2f57ef54ae3ee830d450f9b60bae32a7e046..0000000000000000000000000000000000000000 --- a/model-00061-of-00062.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8a84e91bf4d02c4decb43ad3fe668aaf550a26870cccfe64c1406cc753dc5707 -size 4362241496 diff --git a/model-00062-of-00062.safetensors b/model-00062-of-00062.safetensors deleted file mode 100644 index 4165aac25628e001e71ac48ffa82a27282772a6b..0000000000000000000000000000000000000000 --- a/model-00062-of-00062.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:bd33cede2c0f12b482393639160b7e897941ab08d724eecd875202238a4194d2 -size 4202692736 diff --git a/model.safetensors.index.json b/model.safetensors.index.json index 8aab69e95f0d7da106bad45a4e7c35897ecf3d80..37b1afe63cadc4ddce30aaff1b149c2f3083650c 100644 --- a/model.safetensors.index.json +++ b/model.safetensors.index.json @@ -1,730 +1,730 @@ { "metadata": { - "total_size": 282214825984 + "total_size": 141107412992 }, "weight_map": { - "lm_head.weight": "model-00062-of-00062.safetensors", - "model.embed_tokens.weight": "model-00001-of-00062.safetensors", - "model.layers.0.input_layernorm.weight": "model-00002-of-00062.safetensors", - "model.layers.0.mlp.down_proj.weight": "model-00002-of-00062.safetensors", - "model.layers.0.mlp.gate_proj.weight": "model-00002-of-00062.safetensors", - "model.layers.0.mlp.up_proj.weight": "model-00002-of-00062.safetensors", - "model.layers.0.post_attention_layernorm.weight": "model-00002-of-00062.safetensors", - "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00062.safetensors", - "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00062.safetensors", - "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00062.safetensors", - "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00062.safetensors", - "model.layers.1.input_layernorm.weight": "model-00003-of-00062.safetensors", - "model.layers.1.mlp.down_proj.weight": "model-00003-of-00062.safetensors", - "model.layers.1.mlp.gate_proj.weight": "model-00002-of-00062.safetensors", - "model.layers.1.mlp.up_proj.weight": "model-00003-of-00062.safetensors", - "model.layers.1.post_attention_layernorm.weight": "model-00003-of-00062.safetensors", - "model.layers.1.self_attn.k_proj.weight": "model-00002-of-00062.safetensors", - "model.layers.1.self_attn.o_proj.weight": "model-00002-of-00062.safetensors", - "model.layers.1.self_attn.q_proj.weight": "model-00002-of-00062.safetensors", - "model.layers.1.self_attn.v_proj.weight": "model-00002-of-00062.safetensors", - "model.layers.10.input_layernorm.weight": "model-00010-of-00062.safetensors", - "model.layers.10.mlp.down_proj.weight": "model-00010-of-00062.safetensors", - "model.layers.10.mlp.gate_proj.weight": "model-00009-of-00062.safetensors", - "model.layers.10.mlp.up_proj.weight": "model-00009-of-00062.safetensors", - "model.layers.10.post_attention_layernorm.weight": "model-00010-of-00062.safetensors", - "model.layers.10.self_attn.k_proj.weight": "model-00009-of-00062.safetensors", - "model.layers.10.self_attn.o_proj.weight": "model-00009-of-00062.safetensors", - "model.layers.10.self_attn.q_proj.weight": "model-00009-of-00062.safetensors", - "model.layers.10.self_attn.v_proj.weight": "model-00009-of-00062.safetensors", - "model.layers.11.input_layernorm.weight": "model-00010-of-00062.safetensors", - "model.layers.11.mlp.down_proj.weight": "model-00010-of-00062.safetensors", - "model.layers.11.mlp.gate_proj.weight": "model-00010-of-00062.safetensors", - "model.layers.11.mlp.up_proj.weight": "model-00010-of-00062.safetensors", - "model.layers.11.post_attention_layernorm.weight": "model-00010-of-00062.safetensors", - "model.layers.11.self_attn.k_proj.weight": "model-00010-of-00062.safetensors", - "model.layers.11.self_attn.o_proj.weight": "model-00010-of-00062.safetensors", - "model.layers.11.self_attn.q_proj.weight": "model-00010-of-00062.safetensors", - "model.layers.11.self_attn.v_proj.weight": "model-00010-of-00062.safetensors", - "model.layers.12.input_layernorm.weight": "model-00011-of-00062.safetensors", - "model.layers.12.mlp.down_proj.weight": "model-00011-of-00062.safetensors", - "model.layers.12.mlp.gate_proj.weight": "model-00011-of-00062.safetensors", - "model.layers.12.mlp.up_proj.weight": "model-00011-of-00062.safetensors", - "model.layers.12.post_attention_layernorm.weight": "model-00011-of-00062.safetensors", - "model.layers.12.self_attn.k_proj.weight": "model-00010-of-00062.safetensors", - "model.layers.12.self_attn.o_proj.weight": "model-00010-of-00062.safetensors", - "model.layers.12.self_attn.q_proj.weight": "model-00010-of-00062.safetensors", - "model.layers.12.self_attn.v_proj.weight": "model-00010-of-00062.safetensors", - "model.layers.13.input_layernorm.weight": "model-00012-of-00062.safetensors", - "model.layers.13.mlp.down_proj.weight": "model-00012-of-00062.safetensors", - "model.layers.13.mlp.gate_proj.weight": "model-00011-of-00062.safetensors", - "model.layers.13.mlp.up_proj.weight": "model-00012-of-00062.safetensors", - "model.layers.13.post_attention_layernorm.weight": "model-00012-of-00062.safetensors", - "model.layers.13.self_attn.k_proj.weight": "model-00011-of-00062.safetensors", - "model.layers.13.self_attn.o_proj.weight": "model-00011-of-00062.safetensors", - "model.layers.13.self_attn.q_proj.weight": "model-00011-of-00062.safetensors", - "model.layers.13.self_attn.v_proj.weight": "model-00011-of-00062.safetensors", - "model.layers.14.input_layernorm.weight": "model-00013-of-00062.safetensors", - "model.layers.14.mlp.down_proj.weight": "model-00013-of-00062.safetensors", - "model.layers.14.mlp.gate_proj.weight": "model-00012-of-00062.safetensors", - "model.layers.14.mlp.up_proj.weight": "model-00012-of-00062.safetensors", - "model.layers.14.post_attention_layernorm.weight": "model-00013-of-00062.safetensors", - "model.layers.14.self_attn.k_proj.weight": "model-00012-of-00062.safetensors", - "model.layers.14.self_attn.o_proj.weight": "model-00012-of-00062.safetensors", - "model.layers.14.self_attn.q_proj.weight": "model-00012-of-00062.safetensors", - "model.layers.14.self_attn.v_proj.weight": "model-00012-of-00062.safetensors", - "model.layers.15.input_layernorm.weight": "model-00013-of-00062.safetensors", - "model.layers.15.mlp.down_proj.weight": "model-00013-of-00062.safetensors", - "model.layers.15.mlp.gate_proj.weight": "model-00013-of-00062.safetensors", - "model.layers.15.mlp.up_proj.weight": "model-00013-of-00062.safetensors", - "model.layers.15.post_attention_layernorm.weight": "model-00013-of-00062.safetensors", - "model.layers.15.self_attn.k_proj.weight": "model-00013-of-00062.safetensors", - "model.layers.15.self_attn.o_proj.weight": "model-00013-of-00062.safetensors", - "model.layers.15.self_attn.q_proj.weight": "model-00013-of-00062.safetensors", - "model.layers.15.self_attn.v_proj.weight": "model-00013-of-00062.safetensors", - "model.layers.16.input_layernorm.weight": "model-00014-of-00062.safetensors", - "model.layers.16.mlp.down_proj.weight": "model-00014-of-00062.safetensors", - "model.layers.16.mlp.gate_proj.weight": "model-00014-of-00062.safetensors", - "model.layers.16.mlp.up_proj.weight": "model-00014-of-00062.safetensors", - "model.layers.16.post_attention_layernorm.weight": "model-00014-of-00062.safetensors", - "model.layers.16.self_attn.k_proj.weight": "model-00013-of-00062.safetensors", - "model.layers.16.self_attn.o_proj.weight": "model-00013-of-00062.safetensors", - "model.layers.16.self_attn.q_proj.weight": "model-00013-of-00062.safetensors", - "model.layers.16.self_attn.v_proj.weight": "model-00013-of-00062.safetensors", - "model.layers.17.input_layernorm.weight": "model-00015-of-00062.safetensors", - "model.layers.17.mlp.down_proj.weight": "model-00015-of-00062.safetensors", - "model.layers.17.mlp.gate_proj.weight": "model-00014-of-00062.safetensors", - "model.layers.17.mlp.up_proj.weight": "model-00015-of-00062.safetensors", - "model.layers.17.post_attention_layernorm.weight": "model-00015-of-00062.safetensors", - "model.layers.17.self_attn.k_proj.weight": "model-00014-of-00062.safetensors", - "model.layers.17.self_attn.o_proj.weight": "model-00014-of-00062.safetensors", - "model.layers.17.self_attn.q_proj.weight": "model-00014-of-00062.safetensors", - "model.layers.17.self_attn.v_proj.weight": "model-00014-of-00062.safetensors", - "model.layers.18.input_layernorm.weight": "model-00016-of-00062.safetensors", - "model.layers.18.mlp.down_proj.weight": "model-00016-of-00062.safetensors", - "model.layers.18.mlp.gate_proj.weight": "model-00015-of-00062.safetensors", - "model.layers.18.mlp.up_proj.weight": "model-00015-of-00062.safetensors", - "model.layers.18.post_attention_layernorm.weight": "model-00016-of-00062.safetensors", - "model.layers.18.self_attn.k_proj.weight": "model-00015-of-00062.safetensors", - "model.layers.18.self_attn.o_proj.weight": "model-00015-of-00062.safetensors", - "model.layers.18.self_attn.q_proj.weight": "model-00015-of-00062.safetensors", - "model.layers.18.self_attn.v_proj.weight": "model-00015-of-00062.safetensors", - "model.layers.19.input_layernorm.weight": "model-00016-of-00062.safetensors", - "model.layers.19.mlp.down_proj.weight": "model-00016-of-00062.safetensors", - "model.layers.19.mlp.gate_proj.weight": "model-00016-of-00062.safetensors", - "model.layers.19.mlp.up_proj.weight": "model-00016-of-00062.safetensors", - "model.layers.19.post_attention_layernorm.weight": "model-00016-of-00062.safetensors", - "model.layers.19.self_attn.k_proj.weight": "model-00016-of-00062.safetensors", - "model.layers.19.self_attn.o_proj.weight": "model-00016-of-00062.safetensors", - "model.layers.19.self_attn.q_proj.weight": "model-00016-of-00062.safetensors", - "model.layers.19.self_attn.v_proj.weight": "model-00016-of-00062.safetensors", - "model.layers.2.input_layernorm.weight": "model-00004-of-00062.safetensors", - "model.layers.2.mlp.down_proj.weight": "model-00004-of-00062.safetensors", - "model.layers.2.mlp.gate_proj.weight": "model-00003-of-00062.safetensors", - "model.layers.2.mlp.up_proj.weight": "model-00003-of-00062.safetensors", - "model.layers.2.post_attention_layernorm.weight": "model-00004-of-00062.safetensors", - "model.layers.2.self_attn.k_proj.weight": "model-00003-of-00062.safetensors", - "model.layers.2.self_attn.o_proj.weight": "model-00003-of-00062.safetensors", - "model.layers.2.self_attn.q_proj.weight": "model-00003-of-00062.safetensors", - "model.layers.2.self_attn.v_proj.weight": "model-00003-of-00062.safetensors", - "model.layers.20.input_layernorm.weight": "model-00017-of-00062.safetensors", - "model.layers.20.mlp.down_proj.weight": "model-00017-of-00062.safetensors", - "model.layers.20.mlp.gate_proj.weight": "model-00017-of-00062.safetensors", - "model.layers.20.mlp.up_proj.weight": "model-00017-of-00062.safetensors", - "model.layers.20.post_attention_layernorm.weight": "model-00017-of-00062.safetensors", - "model.layers.20.self_attn.k_proj.weight": "model-00016-of-00062.safetensors", - "model.layers.20.self_attn.o_proj.weight": "model-00016-of-00062.safetensors", - "model.layers.20.self_attn.q_proj.weight": "model-00016-of-00062.safetensors", - "model.layers.20.self_attn.v_proj.weight": "model-00016-of-00062.safetensors", - "model.layers.21.input_layernorm.weight": "model-00018-of-00062.safetensors", - "model.layers.21.mlp.down_proj.weight": "model-00018-of-00062.safetensors", - "model.layers.21.mlp.gate_proj.weight": "model-00017-of-00062.safetensors", - "model.layers.21.mlp.up_proj.weight": "model-00018-of-00062.safetensors", - "model.layers.21.post_attention_layernorm.weight": "model-00018-of-00062.safetensors", - "model.layers.21.self_attn.k_proj.weight": "model-00017-of-00062.safetensors", - "model.layers.21.self_attn.o_proj.weight": "model-00017-of-00062.safetensors", - "model.layers.21.self_attn.q_proj.weight": "model-00017-of-00062.safetensors", - "model.layers.21.self_attn.v_proj.weight": "model-00017-of-00062.safetensors", - "model.layers.22.input_layernorm.weight": "model-00019-of-00062.safetensors", - "model.layers.22.mlp.down_proj.weight": "model-00019-of-00062.safetensors", - "model.layers.22.mlp.gate_proj.weight": "model-00018-of-00062.safetensors", - "model.layers.22.mlp.up_proj.weight": "model-00018-of-00062.safetensors", - "model.layers.22.post_attention_layernorm.weight": "model-00019-of-00062.safetensors", - "model.layers.22.self_attn.k_proj.weight": "model-00018-of-00062.safetensors", - "model.layers.22.self_attn.o_proj.weight": "model-00018-of-00062.safetensors", - "model.layers.22.self_attn.q_proj.weight": "model-00018-of-00062.safetensors", - "model.layers.22.self_attn.v_proj.weight": "model-00018-of-00062.safetensors", - "model.layers.23.input_layernorm.weight": "model-00019-of-00062.safetensors", - "model.layers.23.mlp.down_proj.weight": "model-00019-of-00062.safetensors", - "model.layers.23.mlp.gate_proj.weight": "model-00019-of-00062.safetensors", - "model.layers.23.mlp.up_proj.weight": "model-00019-of-00062.safetensors", - "model.layers.23.post_attention_layernorm.weight": "model-00019-of-00062.safetensors", - "model.layers.23.self_attn.k_proj.weight": "model-00019-of-00062.safetensors", - "model.layers.23.self_attn.o_proj.weight": "model-00019-of-00062.safetensors", - "model.layers.23.self_attn.q_proj.weight": "model-00019-of-00062.safetensors", - "model.layers.23.self_attn.v_proj.weight": "model-00019-of-00062.safetensors", - "model.layers.24.input_layernorm.weight": "model-00020-of-00062.safetensors", - "model.layers.24.mlp.down_proj.weight": "model-00020-of-00062.safetensors", - "model.layers.24.mlp.gate_proj.weight": "model-00020-of-00062.safetensors", - "model.layers.24.mlp.up_proj.weight": "model-00020-of-00062.safetensors", - "model.layers.24.post_attention_layernorm.weight": "model-00020-of-00062.safetensors", - "model.layers.24.self_attn.k_proj.weight": "model-00019-of-00062.safetensors", - "model.layers.24.self_attn.o_proj.weight": "model-00019-of-00062.safetensors", - "model.layers.24.self_attn.q_proj.weight": "model-00019-of-00062.safetensors", - "model.layers.24.self_attn.v_proj.weight": "model-00019-of-00062.safetensors", - "model.layers.25.input_layernorm.weight": "model-00021-of-00062.safetensors", - "model.layers.25.mlp.down_proj.weight": "model-00021-of-00062.safetensors", - "model.layers.25.mlp.gate_proj.weight": "model-00020-of-00062.safetensors", - "model.layers.25.mlp.up_proj.weight": "model-00021-of-00062.safetensors", - "model.layers.25.post_attention_layernorm.weight": "model-00021-of-00062.safetensors", - "model.layers.25.self_attn.k_proj.weight": "model-00020-of-00062.safetensors", - "model.layers.25.self_attn.o_proj.weight": "model-00020-of-00062.safetensors", - "model.layers.25.self_attn.q_proj.weight": "model-00020-of-00062.safetensors", - "model.layers.25.self_attn.v_proj.weight": "model-00020-of-00062.safetensors", - "model.layers.26.input_layernorm.weight": "model-00022-of-00062.safetensors", - "model.layers.26.mlp.down_proj.weight": "model-00022-of-00062.safetensors", - "model.layers.26.mlp.gate_proj.weight": "model-00021-of-00062.safetensors", - "model.layers.26.mlp.up_proj.weight": "model-00021-of-00062.safetensors", - "model.layers.26.post_attention_layernorm.weight": "model-00022-of-00062.safetensors", - "model.layers.26.self_attn.k_proj.weight": "model-00021-of-00062.safetensors", - "model.layers.26.self_attn.o_proj.weight": "model-00021-of-00062.safetensors", - "model.layers.26.self_attn.q_proj.weight": "model-00021-of-00062.safetensors", - "model.layers.26.self_attn.v_proj.weight": "model-00021-of-00062.safetensors", - "model.layers.27.input_layernorm.weight": "model-00022-of-00062.safetensors", - "model.layers.27.mlp.down_proj.weight": "model-00022-of-00062.safetensors", - "model.layers.27.mlp.gate_proj.weight": "model-00022-of-00062.safetensors", - "model.layers.27.mlp.up_proj.weight": "model-00022-of-00062.safetensors", - "model.layers.27.post_attention_layernorm.weight": "model-00022-of-00062.safetensors", - "model.layers.27.self_attn.k_proj.weight": "model-00022-of-00062.safetensors", - "model.layers.27.self_attn.o_proj.weight": "model-00022-of-00062.safetensors", - "model.layers.27.self_attn.q_proj.weight": "model-00022-of-00062.safetensors", - "model.layers.27.self_attn.v_proj.weight": "model-00022-of-00062.safetensors", - "model.layers.28.input_layernorm.weight": "model-00023-of-00062.safetensors", - "model.layers.28.mlp.down_proj.weight": "model-00023-of-00062.safetensors", - "model.layers.28.mlp.gate_proj.weight": "model-00023-of-00062.safetensors", - "model.layers.28.mlp.up_proj.weight": "model-00023-of-00062.safetensors", - "model.layers.28.post_attention_layernorm.weight": "model-00023-of-00062.safetensors", - "model.layers.28.self_attn.k_proj.weight": "model-00022-of-00062.safetensors", - "model.layers.28.self_attn.o_proj.weight": "model-00022-of-00062.safetensors", - "model.layers.28.self_attn.q_proj.weight": "model-00022-of-00062.safetensors", - "model.layers.28.self_attn.v_proj.weight": "model-00022-of-00062.safetensors", - "model.layers.29.input_layernorm.weight": "model-00024-of-00062.safetensors", - "model.layers.29.mlp.down_proj.weight": "model-00024-of-00062.safetensors", - "model.layers.29.mlp.gate_proj.weight": "model-00023-of-00062.safetensors", - "model.layers.29.mlp.up_proj.weight": "model-00024-of-00062.safetensors", - "model.layers.29.post_attention_layernorm.weight": "model-00024-of-00062.safetensors", - "model.layers.29.self_attn.k_proj.weight": "model-00023-of-00062.safetensors", - "model.layers.29.self_attn.o_proj.weight": "model-00023-of-00062.safetensors", - "model.layers.29.self_attn.q_proj.weight": "model-00023-of-00062.safetensors", - "model.layers.29.self_attn.v_proj.weight": "model-00023-of-00062.safetensors", - "model.layers.3.input_layernorm.weight": "model-00004-of-00062.safetensors", - "model.layers.3.mlp.down_proj.weight": "model-00004-of-00062.safetensors", - "model.layers.3.mlp.gate_proj.weight": "model-00004-of-00062.safetensors", - "model.layers.3.mlp.up_proj.weight": "model-00004-of-00062.safetensors", - "model.layers.3.post_attention_layernorm.weight": "model-00004-of-00062.safetensors", - "model.layers.3.self_attn.k_proj.weight": "model-00004-of-00062.safetensors", - "model.layers.3.self_attn.o_proj.weight": "model-00004-of-00062.safetensors", - "model.layers.3.self_attn.q_proj.weight": "model-00004-of-00062.safetensors", - "model.layers.3.self_attn.v_proj.weight": "model-00004-of-00062.safetensors", - "model.layers.30.input_layernorm.weight": "model-00025-of-00062.safetensors", - "model.layers.30.mlp.down_proj.weight": "model-00025-of-00062.safetensors", - "model.layers.30.mlp.gate_proj.weight": "model-00024-of-00062.safetensors", - "model.layers.30.mlp.up_proj.weight": "model-00024-of-00062.safetensors", - "model.layers.30.post_attention_layernorm.weight": "model-00025-of-00062.safetensors", - "model.layers.30.self_attn.k_proj.weight": "model-00024-of-00062.safetensors", - "model.layers.30.self_attn.o_proj.weight": "model-00024-of-00062.safetensors", - "model.layers.30.self_attn.q_proj.weight": "model-00024-of-00062.safetensors", - "model.layers.30.self_attn.v_proj.weight": "model-00024-of-00062.safetensors", - "model.layers.31.input_layernorm.weight": "model-00025-of-00062.safetensors", - "model.layers.31.mlp.down_proj.weight": "model-00025-of-00062.safetensors", - "model.layers.31.mlp.gate_proj.weight": "model-00025-of-00062.safetensors", - "model.layers.31.mlp.up_proj.weight": "model-00025-of-00062.safetensors", - "model.layers.31.post_attention_layernorm.weight": "model-00025-of-00062.safetensors", - "model.layers.31.self_attn.k_proj.weight": "model-00025-of-00062.safetensors", - "model.layers.31.self_attn.o_proj.weight": "model-00025-of-00062.safetensors", - "model.layers.31.self_attn.q_proj.weight": "model-00025-of-00062.safetensors", - "model.layers.31.self_attn.v_proj.weight": "model-00025-of-00062.safetensors", - "model.layers.32.input_layernorm.weight": "model-00026-of-00062.safetensors", - "model.layers.32.mlp.down_proj.weight": "model-00026-of-00062.safetensors", - "model.layers.32.mlp.gate_proj.weight": "model-00026-of-00062.safetensors", - "model.layers.32.mlp.up_proj.weight": "model-00026-of-00062.safetensors", - "model.layers.32.post_attention_layernorm.weight": "model-00026-of-00062.safetensors", - "model.layers.32.self_attn.k_proj.weight": "model-00025-of-00062.safetensors", - "model.layers.32.self_attn.o_proj.weight": "model-00025-of-00062.safetensors", - "model.layers.32.self_attn.q_proj.weight": "model-00025-of-00062.safetensors", - "model.layers.32.self_attn.v_proj.weight": "model-00025-of-00062.safetensors", - "model.layers.33.input_layernorm.weight": "model-00027-of-00062.safetensors", - "model.layers.33.mlp.down_proj.weight": "model-00027-of-00062.safetensors", - "model.layers.33.mlp.gate_proj.weight": "model-00026-of-00062.safetensors", - "model.layers.33.mlp.up_proj.weight": "model-00027-of-00062.safetensors", - "model.layers.33.post_attention_layernorm.weight": "model-00027-of-00062.safetensors", - "model.layers.33.self_attn.k_proj.weight": "model-00026-of-00062.safetensors", - "model.layers.33.self_attn.o_proj.weight": "model-00026-of-00062.safetensors", - "model.layers.33.self_attn.q_proj.weight": "model-00026-of-00062.safetensors", - "model.layers.33.self_attn.v_proj.weight": "model-00026-of-00062.safetensors", - "model.layers.34.input_layernorm.weight": "model-00028-of-00062.safetensors", - "model.layers.34.mlp.down_proj.weight": "model-00028-of-00062.safetensors", - "model.layers.34.mlp.gate_proj.weight": "model-00027-of-00062.safetensors", - "model.layers.34.mlp.up_proj.weight": "model-00027-of-00062.safetensors", - "model.layers.34.post_attention_layernorm.weight": "model-00028-of-00062.safetensors", - "model.layers.34.self_attn.k_proj.weight": "model-00027-of-00062.safetensors", - "model.layers.34.self_attn.o_proj.weight": "model-00027-of-00062.safetensors", - "model.layers.34.self_attn.q_proj.weight": "model-00027-of-00062.safetensors", - "model.layers.34.self_attn.v_proj.weight": "model-00027-of-00062.safetensors", - "model.layers.35.input_layernorm.weight": "model-00028-of-00062.safetensors", - "model.layers.35.mlp.down_proj.weight": "model-00028-of-00062.safetensors", - "model.layers.35.mlp.gate_proj.weight": "model-00028-of-00062.safetensors", - "model.layers.35.mlp.up_proj.weight": "model-00028-of-00062.safetensors", - "model.layers.35.post_attention_layernorm.weight": "model-00028-of-00062.safetensors", - "model.layers.35.self_attn.k_proj.weight": "model-00028-of-00062.safetensors", - "model.layers.35.self_attn.o_proj.weight": "model-00028-of-00062.safetensors", - "model.layers.35.self_attn.q_proj.weight": "model-00028-of-00062.safetensors", - "model.layers.35.self_attn.v_proj.weight": "model-00028-of-00062.safetensors", - "model.layers.36.input_layernorm.weight": "model-00029-of-00062.safetensors", - "model.layers.36.mlp.down_proj.weight": "model-00029-of-00062.safetensors", - "model.layers.36.mlp.gate_proj.weight": "model-00029-of-00062.safetensors", - "model.layers.36.mlp.up_proj.weight": "model-00029-of-00062.safetensors", - "model.layers.36.post_attention_layernorm.weight": "model-00029-of-00062.safetensors", - "model.layers.36.self_attn.k_proj.weight": "model-00028-of-00062.safetensors", - "model.layers.36.self_attn.o_proj.weight": "model-00028-of-00062.safetensors", - "model.layers.36.self_attn.q_proj.weight": "model-00028-of-00062.safetensors", - "model.layers.36.self_attn.v_proj.weight": "model-00028-of-00062.safetensors", - "model.layers.37.input_layernorm.weight": "model-00030-of-00062.safetensors", - "model.layers.37.mlp.down_proj.weight": "model-00030-of-00062.safetensors", - "model.layers.37.mlp.gate_proj.weight": "model-00029-of-00062.safetensors", - "model.layers.37.mlp.up_proj.weight": "model-00030-of-00062.safetensors", - "model.layers.37.post_attention_layernorm.weight": "model-00030-of-00062.safetensors", - "model.layers.37.self_attn.k_proj.weight": "model-00029-of-00062.safetensors", - "model.layers.37.self_attn.o_proj.weight": "model-00029-of-00062.safetensors", - "model.layers.37.self_attn.q_proj.weight": "model-00029-of-00062.safetensors", - "model.layers.37.self_attn.v_proj.weight": "model-00029-of-00062.safetensors", - "model.layers.38.input_layernorm.weight": "model-00031-of-00062.safetensors", - "model.layers.38.mlp.down_proj.weight": "model-00031-of-00062.safetensors", - "model.layers.38.mlp.gate_proj.weight": "model-00030-of-00062.safetensors", - "model.layers.38.mlp.up_proj.weight": "model-00030-of-00062.safetensors", - "model.layers.38.post_attention_layernorm.weight": "model-00031-of-00062.safetensors", - "model.layers.38.self_attn.k_proj.weight": "model-00030-of-00062.safetensors", - "model.layers.38.self_attn.o_proj.weight": "model-00030-of-00062.safetensors", - "model.layers.38.self_attn.q_proj.weight": "model-00030-of-00062.safetensors", - "model.layers.38.self_attn.v_proj.weight": "model-00030-of-00062.safetensors", - "model.layers.39.input_layernorm.weight": "model-00031-of-00062.safetensors", - "model.layers.39.mlp.down_proj.weight": "model-00031-of-00062.safetensors", - "model.layers.39.mlp.gate_proj.weight": "model-00031-of-00062.safetensors", - "model.layers.39.mlp.up_proj.weight": "model-00031-of-00062.safetensors", - "model.layers.39.post_attention_layernorm.weight": "model-00031-of-00062.safetensors", - "model.layers.39.self_attn.k_proj.weight": "model-00031-of-00062.safetensors", - "model.layers.39.self_attn.o_proj.weight": "model-00031-of-00062.safetensors", - "model.layers.39.self_attn.q_proj.weight": "model-00031-of-00062.safetensors", - "model.layers.39.self_attn.v_proj.weight": "model-00031-of-00062.safetensors", - "model.layers.4.input_layernorm.weight": "model-00005-of-00062.safetensors", - "model.layers.4.mlp.down_proj.weight": "model-00005-of-00062.safetensors", - "model.layers.4.mlp.gate_proj.weight": "model-00005-of-00062.safetensors", - "model.layers.4.mlp.up_proj.weight": "model-00005-of-00062.safetensors", - "model.layers.4.post_attention_layernorm.weight": "model-00005-of-00062.safetensors", - "model.layers.4.self_attn.k_proj.weight": "model-00004-of-00062.safetensors", - "model.layers.4.self_attn.o_proj.weight": "model-00004-of-00062.safetensors", - "model.layers.4.self_attn.q_proj.weight": "model-00004-of-00062.safetensors", - "model.layers.4.self_attn.v_proj.weight": "model-00004-of-00062.safetensors", - "model.layers.40.input_layernorm.weight": "model-00032-of-00062.safetensors", - "model.layers.40.mlp.down_proj.weight": "model-00032-of-00062.safetensors", - "model.layers.40.mlp.gate_proj.weight": "model-00032-of-00062.safetensors", - "model.layers.40.mlp.up_proj.weight": "model-00032-of-00062.safetensors", - "model.layers.40.post_attention_layernorm.weight": "model-00032-of-00062.safetensors", - "model.layers.40.self_attn.k_proj.weight": "model-00031-of-00062.safetensors", - "model.layers.40.self_attn.o_proj.weight": "model-00031-of-00062.safetensors", - "model.layers.40.self_attn.q_proj.weight": "model-00031-of-00062.safetensors", - "model.layers.40.self_attn.v_proj.weight": "model-00031-of-00062.safetensors", - "model.layers.41.input_layernorm.weight": "model-00033-of-00062.safetensors", - "model.layers.41.mlp.down_proj.weight": "model-00033-of-00062.safetensors", - "model.layers.41.mlp.gate_proj.weight": "model-00032-of-00062.safetensors", - "model.layers.41.mlp.up_proj.weight": "model-00033-of-00062.safetensors", - "model.layers.41.post_attention_layernorm.weight": "model-00033-of-00062.safetensors", - "model.layers.41.self_attn.k_proj.weight": "model-00032-of-00062.safetensors", - "model.layers.41.self_attn.o_proj.weight": "model-00032-of-00062.safetensors", - "model.layers.41.self_attn.q_proj.weight": "model-00032-of-00062.safetensors", - "model.layers.41.self_attn.v_proj.weight": "model-00032-of-00062.safetensors", - "model.layers.42.input_layernorm.weight": "model-00034-of-00062.safetensors", - "model.layers.42.mlp.down_proj.weight": "model-00034-of-00062.safetensors", - "model.layers.42.mlp.gate_proj.weight": "model-00033-of-00062.safetensors", - "model.layers.42.mlp.up_proj.weight": "model-00033-of-00062.safetensors", - "model.layers.42.post_attention_layernorm.weight": "model-00034-of-00062.safetensors", - "model.layers.42.self_attn.k_proj.weight": "model-00033-of-00062.safetensors", - "model.layers.42.self_attn.o_proj.weight": "model-00033-of-00062.safetensors", - "model.layers.42.self_attn.q_proj.weight": "model-00033-of-00062.safetensors", - "model.layers.42.self_attn.v_proj.weight": "model-00033-of-00062.safetensors", - "model.layers.43.input_layernorm.weight": "model-00034-of-00062.safetensors", - "model.layers.43.mlp.down_proj.weight": "model-00034-of-00062.safetensors", - "model.layers.43.mlp.gate_proj.weight": "model-00034-of-00062.safetensors", - "model.layers.43.mlp.up_proj.weight": "model-00034-of-00062.safetensors", - "model.layers.43.post_attention_layernorm.weight": "model-00034-of-00062.safetensors", - "model.layers.43.self_attn.k_proj.weight": "model-00034-of-00062.safetensors", - "model.layers.43.self_attn.o_proj.weight": "model-00034-of-00062.safetensors", - "model.layers.43.self_attn.q_proj.weight": "model-00034-of-00062.safetensors", - "model.layers.43.self_attn.v_proj.weight": "model-00034-of-00062.safetensors", - "model.layers.44.input_layernorm.weight": "model-00035-of-00062.safetensors", - "model.layers.44.mlp.down_proj.weight": "model-00035-of-00062.safetensors", - "model.layers.44.mlp.gate_proj.weight": "model-00035-of-00062.safetensors", - "model.layers.44.mlp.up_proj.weight": "model-00035-of-00062.safetensors", - "model.layers.44.post_attention_layernorm.weight": "model-00035-of-00062.safetensors", - "model.layers.44.self_attn.k_proj.weight": "model-00034-of-00062.safetensors", - "model.layers.44.self_attn.o_proj.weight": "model-00034-of-00062.safetensors", - "model.layers.44.self_attn.q_proj.weight": "model-00034-of-00062.safetensors", - "model.layers.44.self_attn.v_proj.weight": "model-00034-of-00062.safetensors", - "model.layers.45.input_layernorm.weight": "model-00036-of-00062.safetensors", - "model.layers.45.mlp.down_proj.weight": "model-00036-of-00062.safetensors", - "model.layers.45.mlp.gate_proj.weight": "model-00035-of-00062.safetensors", - "model.layers.45.mlp.up_proj.weight": "model-00036-of-00062.safetensors", - "model.layers.45.post_attention_layernorm.weight": "model-00036-of-00062.safetensors", - "model.layers.45.self_attn.k_proj.weight": "model-00035-of-00062.safetensors", - "model.layers.45.self_attn.o_proj.weight": "model-00035-of-00062.safetensors", - "model.layers.45.self_attn.q_proj.weight": "model-00035-of-00062.safetensors", - "model.layers.45.self_attn.v_proj.weight": "model-00035-of-00062.safetensors", - "model.layers.46.input_layernorm.weight": "model-00037-of-00062.safetensors", - "model.layers.46.mlp.down_proj.weight": "model-00037-of-00062.safetensors", - "model.layers.46.mlp.gate_proj.weight": "model-00036-of-00062.safetensors", - "model.layers.46.mlp.up_proj.weight": "model-00036-of-00062.safetensors", - "model.layers.46.post_attention_layernorm.weight": "model-00037-of-00062.safetensors", - "model.layers.46.self_attn.k_proj.weight": "model-00036-of-00062.safetensors", - "model.layers.46.self_attn.o_proj.weight": "model-00036-of-00062.safetensors", - "model.layers.46.self_attn.q_proj.weight": "model-00036-of-00062.safetensors", - "model.layers.46.self_attn.v_proj.weight": "model-00036-of-00062.safetensors", - "model.layers.47.input_layernorm.weight": "model-00037-of-00062.safetensors", - "model.layers.47.mlp.down_proj.weight": "model-00037-of-00062.safetensors", - "model.layers.47.mlp.gate_proj.weight": "model-00037-of-00062.safetensors", - "model.layers.47.mlp.up_proj.weight": "model-00037-of-00062.safetensors", - "model.layers.47.post_attention_layernorm.weight": "model-00037-of-00062.safetensors", - "model.layers.47.self_attn.k_proj.weight": "model-00037-of-00062.safetensors", - "model.layers.47.self_attn.o_proj.weight": "model-00037-of-00062.safetensors", - "model.layers.47.self_attn.q_proj.weight": "model-00037-of-00062.safetensors", - "model.layers.47.self_attn.v_proj.weight": "model-00037-of-00062.safetensors", - "model.layers.48.input_layernorm.weight": "model-00038-of-00062.safetensors", - "model.layers.48.mlp.down_proj.weight": "model-00038-of-00062.safetensors", - "model.layers.48.mlp.gate_proj.weight": "model-00038-of-00062.safetensors", - "model.layers.48.mlp.up_proj.weight": "model-00038-of-00062.safetensors", - "model.layers.48.post_attention_layernorm.weight": "model-00038-of-00062.safetensors", - "model.layers.48.self_attn.k_proj.weight": "model-00037-of-00062.safetensors", - "model.layers.48.self_attn.o_proj.weight": "model-00037-of-00062.safetensors", - "model.layers.48.self_attn.q_proj.weight": "model-00037-of-00062.safetensors", - "model.layers.48.self_attn.v_proj.weight": "model-00037-of-00062.safetensors", - "model.layers.49.input_layernorm.weight": "model-00039-of-00062.safetensors", - "model.layers.49.mlp.down_proj.weight": "model-00039-of-00062.safetensors", - "model.layers.49.mlp.gate_proj.weight": "model-00038-of-00062.safetensors", - "model.layers.49.mlp.up_proj.weight": "model-00039-of-00062.safetensors", - "model.layers.49.post_attention_layernorm.weight": "model-00039-of-00062.safetensors", - "model.layers.49.self_attn.k_proj.weight": "model-00038-of-00062.safetensors", - "model.layers.49.self_attn.o_proj.weight": "model-00038-of-00062.safetensors", - "model.layers.49.self_attn.q_proj.weight": "model-00038-of-00062.safetensors", - "model.layers.49.self_attn.v_proj.weight": "model-00038-of-00062.safetensors", - "model.layers.5.input_layernorm.weight": "model-00006-of-00062.safetensors", - "model.layers.5.mlp.down_proj.weight": "model-00006-of-00062.safetensors", - "model.layers.5.mlp.gate_proj.weight": "model-00005-of-00062.safetensors", - "model.layers.5.mlp.up_proj.weight": "model-00006-of-00062.safetensors", - "model.layers.5.post_attention_layernorm.weight": "model-00006-of-00062.safetensors", - "model.layers.5.self_attn.k_proj.weight": "model-00005-of-00062.safetensors", - "model.layers.5.self_attn.o_proj.weight": "model-00005-of-00062.safetensors", - "model.layers.5.self_attn.q_proj.weight": "model-00005-of-00062.safetensors", - "model.layers.5.self_attn.v_proj.weight": "model-00005-of-00062.safetensors", - "model.layers.50.input_layernorm.weight": "model-00040-of-00062.safetensors", - "model.layers.50.mlp.down_proj.weight": "model-00040-of-00062.safetensors", - "model.layers.50.mlp.gate_proj.weight": "model-00039-of-00062.safetensors", - "model.layers.50.mlp.up_proj.weight": "model-00039-of-00062.safetensors", - "model.layers.50.post_attention_layernorm.weight": "model-00040-of-00062.safetensors", - "model.layers.50.self_attn.k_proj.weight": "model-00039-of-00062.safetensors", - "model.layers.50.self_attn.o_proj.weight": "model-00039-of-00062.safetensors", - "model.layers.50.self_attn.q_proj.weight": "model-00039-of-00062.safetensors", - "model.layers.50.self_attn.v_proj.weight": "model-00039-of-00062.safetensors", - "model.layers.51.input_layernorm.weight": "model-00040-of-00062.safetensors", - "model.layers.51.mlp.down_proj.weight": "model-00040-of-00062.safetensors", - "model.layers.51.mlp.gate_proj.weight": "model-00040-of-00062.safetensors", - "model.layers.51.mlp.up_proj.weight": "model-00040-of-00062.safetensors", - "model.layers.51.post_attention_layernorm.weight": "model-00040-of-00062.safetensors", - "model.layers.51.self_attn.k_proj.weight": "model-00040-of-00062.safetensors", - "model.layers.51.self_attn.o_proj.weight": "model-00040-of-00062.safetensors", - "model.layers.51.self_attn.q_proj.weight": "model-00040-of-00062.safetensors", - "model.layers.51.self_attn.v_proj.weight": "model-00040-of-00062.safetensors", - "model.layers.52.input_layernorm.weight": "model-00041-of-00062.safetensors", - "model.layers.52.mlp.down_proj.weight": "model-00041-of-00062.safetensors", - "model.layers.52.mlp.gate_proj.weight": "model-00041-of-00062.safetensors", - "model.layers.52.mlp.up_proj.weight": "model-00041-of-00062.safetensors", - "model.layers.52.post_attention_layernorm.weight": "model-00041-of-00062.safetensors", - "model.layers.52.self_attn.k_proj.weight": "model-00040-of-00062.safetensors", - "model.layers.52.self_attn.o_proj.weight": "model-00040-of-00062.safetensors", - "model.layers.52.self_attn.q_proj.weight": "model-00040-of-00062.safetensors", - "model.layers.52.self_attn.v_proj.weight": "model-00040-of-00062.safetensors", - "model.layers.53.input_layernorm.weight": "model-00042-of-00062.safetensors", - "model.layers.53.mlp.down_proj.weight": "model-00042-of-00062.safetensors", - "model.layers.53.mlp.gate_proj.weight": "model-00041-of-00062.safetensors", - "model.layers.53.mlp.up_proj.weight": "model-00042-of-00062.safetensors", - "model.layers.53.post_attention_layernorm.weight": "model-00042-of-00062.safetensors", - "model.layers.53.self_attn.k_proj.weight": "model-00041-of-00062.safetensors", - "model.layers.53.self_attn.o_proj.weight": "model-00041-of-00062.safetensors", - "model.layers.53.self_attn.q_proj.weight": "model-00041-of-00062.safetensors", - "model.layers.53.self_attn.v_proj.weight": "model-00041-of-00062.safetensors", - "model.layers.54.input_layernorm.weight": "model-00043-of-00062.safetensors", - "model.layers.54.mlp.down_proj.weight": "model-00043-of-00062.safetensors", - "model.layers.54.mlp.gate_proj.weight": "model-00042-of-00062.safetensors", - "model.layers.54.mlp.up_proj.weight": "model-00042-of-00062.safetensors", - "model.layers.54.post_attention_layernorm.weight": "model-00043-of-00062.safetensors", - "model.layers.54.self_attn.k_proj.weight": "model-00042-of-00062.safetensors", - "model.layers.54.self_attn.o_proj.weight": "model-00042-of-00062.safetensors", - "model.layers.54.self_attn.q_proj.weight": "model-00042-of-00062.safetensors", - "model.layers.54.self_attn.v_proj.weight": "model-00042-of-00062.safetensors", - "model.layers.55.input_layernorm.weight": "model-00043-of-00062.safetensors", - "model.layers.55.mlp.down_proj.weight": "model-00043-of-00062.safetensors", - "model.layers.55.mlp.gate_proj.weight": "model-00043-of-00062.safetensors", - "model.layers.55.mlp.up_proj.weight": "model-00043-of-00062.safetensors", - "model.layers.55.post_attention_layernorm.weight": "model-00043-of-00062.safetensors", - "model.layers.55.self_attn.k_proj.weight": "model-00043-of-00062.safetensors", - "model.layers.55.self_attn.o_proj.weight": "model-00043-of-00062.safetensors", - "model.layers.55.self_attn.q_proj.weight": "model-00043-of-00062.safetensors", - "model.layers.55.self_attn.v_proj.weight": "model-00043-of-00062.safetensors", - "model.layers.56.input_layernorm.weight": "model-00044-of-00062.safetensors", - "model.layers.56.mlp.down_proj.weight": "model-00044-of-00062.safetensors", - "model.layers.56.mlp.gate_proj.weight": "model-00044-of-00062.safetensors", - "model.layers.56.mlp.up_proj.weight": "model-00044-of-00062.safetensors", - "model.layers.56.post_attention_layernorm.weight": "model-00044-of-00062.safetensors", - "model.layers.56.self_attn.k_proj.weight": "model-00043-of-00062.safetensors", - "model.layers.56.self_attn.o_proj.weight": "model-00043-of-00062.safetensors", - "model.layers.56.self_attn.q_proj.weight": "model-00043-of-00062.safetensors", - "model.layers.56.self_attn.v_proj.weight": "model-00043-of-00062.safetensors", - "model.layers.57.input_layernorm.weight": "model-00045-of-00062.safetensors", - "model.layers.57.mlp.down_proj.weight": "model-00045-of-00062.safetensors", - "model.layers.57.mlp.gate_proj.weight": "model-00044-of-00062.safetensors", - "model.layers.57.mlp.up_proj.weight": "model-00045-of-00062.safetensors", - "model.layers.57.post_attention_layernorm.weight": "model-00045-of-00062.safetensors", - "model.layers.57.self_attn.k_proj.weight": "model-00044-of-00062.safetensors", - "model.layers.57.self_attn.o_proj.weight": "model-00044-of-00062.safetensors", - "model.layers.57.self_attn.q_proj.weight": "model-00044-of-00062.safetensors", - "model.layers.57.self_attn.v_proj.weight": "model-00044-of-00062.safetensors", - "model.layers.58.input_layernorm.weight": "model-00046-of-00062.safetensors", - "model.layers.58.mlp.down_proj.weight": "model-00046-of-00062.safetensors", - "model.layers.58.mlp.gate_proj.weight": "model-00045-of-00062.safetensors", - "model.layers.58.mlp.up_proj.weight": "model-00045-of-00062.safetensors", - "model.layers.58.post_attention_layernorm.weight": "model-00046-of-00062.safetensors", - "model.layers.58.self_attn.k_proj.weight": "model-00045-of-00062.safetensors", - "model.layers.58.self_attn.o_proj.weight": "model-00045-of-00062.safetensors", - "model.layers.58.self_attn.q_proj.weight": "model-00045-of-00062.safetensors", - "model.layers.58.self_attn.v_proj.weight": "model-00045-of-00062.safetensors", - "model.layers.59.input_layernorm.weight": "model-00046-of-00062.safetensors", - "model.layers.59.mlp.down_proj.weight": "model-00046-of-00062.safetensors", - "model.layers.59.mlp.gate_proj.weight": "model-00046-of-00062.safetensors", - "model.layers.59.mlp.up_proj.weight": "model-00046-of-00062.safetensors", - "model.layers.59.post_attention_layernorm.weight": "model-00046-of-00062.safetensors", - "model.layers.59.self_attn.k_proj.weight": "model-00046-of-00062.safetensors", - "model.layers.59.self_attn.o_proj.weight": "model-00046-of-00062.safetensors", - "model.layers.59.self_attn.q_proj.weight": "model-00046-of-00062.safetensors", - "model.layers.59.self_attn.v_proj.weight": "model-00046-of-00062.safetensors", - "model.layers.6.input_layernorm.weight": "model-00007-of-00062.safetensors", - "model.layers.6.mlp.down_proj.weight": "model-00007-of-00062.safetensors", - "model.layers.6.mlp.gate_proj.weight": "model-00006-of-00062.safetensors", - "model.layers.6.mlp.up_proj.weight": "model-00006-of-00062.safetensors", - "model.layers.6.post_attention_layernorm.weight": "model-00007-of-00062.safetensors", - "model.layers.6.self_attn.k_proj.weight": "model-00006-of-00062.safetensors", - "model.layers.6.self_attn.o_proj.weight": "model-00006-of-00062.safetensors", - "model.layers.6.self_attn.q_proj.weight": "model-00006-of-00062.safetensors", - "model.layers.6.self_attn.v_proj.weight": "model-00006-of-00062.safetensors", - "model.layers.60.input_layernorm.weight": "model-00047-of-00062.safetensors", - "model.layers.60.mlp.down_proj.weight": "model-00047-of-00062.safetensors", - "model.layers.60.mlp.gate_proj.weight": "model-00047-of-00062.safetensors", - "model.layers.60.mlp.up_proj.weight": "model-00047-of-00062.safetensors", - "model.layers.60.post_attention_layernorm.weight": "model-00047-of-00062.safetensors", - "model.layers.60.self_attn.k_proj.weight": "model-00046-of-00062.safetensors", - "model.layers.60.self_attn.o_proj.weight": "model-00046-of-00062.safetensors", - "model.layers.60.self_attn.q_proj.weight": "model-00046-of-00062.safetensors", - "model.layers.60.self_attn.v_proj.weight": "model-00046-of-00062.safetensors", - "model.layers.61.input_layernorm.weight": "model-00048-of-00062.safetensors", - "model.layers.61.mlp.down_proj.weight": "model-00048-of-00062.safetensors", - "model.layers.61.mlp.gate_proj.weight": "model-00047-of-00062.safetensors", - "model.layers.61.mlp.up_proj.weight": "model-00048-of-00062.safetensors", - "model.layers.61.post_attention_layernorm.weight": "model-00048-of-00062.safetensors", - "model.layers.61.self_attn.k_proj.weight": "model-00047-of-00062.safetensors", - "model.layers.61.self_attn.o_proj.weight": "model-00047-of-00062.safetensors", - "model.layers.61.self_attn.q_proj.weight": "model-00047-of-00062.safetensors", - "model.layers.61.self_attn.v_proj.weight": "model-00047-of-00062.safetensors", - "model.layers.62.input_layernorm.weight": "model-00049-of-00062.safetensors", - "model.layers.62.mlp.down_proj.weight": "model-00049-of-00062.safetensors", - "model.layers.62.mlp.gate_proj.weight": "model-00048-of-00062.safetensors", - "model.layers.62.mlp.up_proj.weight": "model-00048-of-00062.safetensors", - "model.layers.62.post_attention_layernorm.weight": "model-00049-of-00062.safetensors", - "model.layers.62.self_attn.k_proj.weight": "model-00048-of-00062.safetensors", - "model.layers.62.self_attn.o_proj.weight": "model-00048-of-00062.safetensors", - "model.layers.62.self_attn.q_proj.weight": "model-00048-of-00062.safetensors", - "model.layers.62.self_attn.v_proj.weight": "model-00048-of-00062.safetensors", - "model.layers.63.input_layernorm.weight": "model-00049-of-00062.safetensors", - "model.layers.63.mlp.down_proj.weight": "model-00049-of-00062.safetensors", - "model.layers.63.mlp.gate_proj.weight": "model-00049-of-00062.safetensors", - "model.layers.63.mlp.up_proj.weight": "model-00049-of-00062.safetensors", - "model.layers.63.post_attention_layernorm.weight": "model-00049-of-00062.safetensors", - "model.layers.63.self_attn.k_proj.weight": "model-00049-of-00062.safetensors", - "model.layers.63.self_attn.o_proj.weight": "model-00049-of-00062.safetensors", - "model.layers.63.self_attn.q_proj.weight": "model-00049-of-00062.safetensors", - "model.layers.63.self_attn.v_proj.weight": "model-00049-of-00062.safetensors", - "model.layers.64.input_layernorm.weight": "model-00050-of-00062.safetensors", - "model.layers.64.mlp.down_proj.weight": "model-00050-of-00062.safetensors", - "model.layers.64.mlp.gate_proj.weight": "model-00050-of-00062.safetensors", - "model.layers.64.mlp.up_proj.weight": "model-00050-of-00062.safetensors", - "model.layers.64.post_attention_layernorm.weight": "model-00050-of-00062.safetensors", - "model.layers.64.self_attn.k_proj.weight": "model-00049-of-00062.safetensors", - "model.layers.64.self_attn.o_proj.weight": "model-00049-of-00062.safetensors", - "model.layers.64.self_attn.q_proj.weight": "model-00049-of-00062.safetensors", - "model.layers.64.self_attn.v_proj.weight": "model-00049-of-00062.safetensors", - "model.layers.65.input_layernorm.weight": "model-00051-of-00062.safetensors", - "model.layers.65.mlp.down_proj.weight": "model-00051-of-00062.safetensors", - "model.layers.65.mlp.gate_proj.weight": "model-00050-of-00062.safetensors", - "model.layers.65.mlp.up_proj.weight": "model-00051-of-00062.safetensors", - "model.layers.65.post_attention_layernorm.weight": "model-00051-of-00062.safetensors", - "model.layers.65.self_attn.k_proj.weight": "model-00050-of-00062.safetensors", - "model.layers.65.self_attn.o_proj.weight": "model-00050-of-00062.safetensors", - "model.layers.65.self_attn.q_proj.weight": "model-00050-of-00062.safetensors", - "model.layers.65.self_attn.v_proj.weight": "model-00050-of-00062.safetensors", - "model.layers.66.input_layernorm.weight": "model-00052-of-00062.safetensors", - "model.layers.66.mlp.down_proj.weight": "model-00052-of-00062.safetensors", - "model.layers.66.mlp.gate_proj.weight": "model-00051-of-00062.safetensors", - "model.layers.66.mlp.up_proj.weight": "model-00051-of-00062.safetensors", - "model.layers.66.post_attention_layernorm.weight": "model-00052-of-00062.safetensors", - "model.layers.66.self_attn.k_proj.weight": "model-00051-of-00062.safetensors", - "model.layers.66.self_attn.o_proj.weight": "model-00051-of-00062.safetensors", - "model.layers.66.self_attn.q_proj.weight": "model-00051-of-00062.safetensors", - "model.layers.66.self_attn.v_proj.weight": "model-00051-of-00062.safetensors", - "model.layers.67.input_layernorm.weight": "model-00052-of-00062.safetensors", - "model.layers.67.mlp.down_proj.weight": "model-00052-of-00062.safetensors", - "model.layers.67.mlp.gate_proj.weight": "model-00052-of-00062.safetensors", - "model.layers.67.mlp.up_proj.weight": "model-00052-of-00062.safetensors", - "model.layers.67.post_attention_layernorm.weight": "model-00052-of-00062.safetensors", - "model.layers.67.self_attn.k_proj.weight": "model-00052-of-00062.safetensors", - "model.layers.67.self_attn.o_proj.weight": "model-00052-of-00062.safetensors", - "model.layers.67.self_attn.q_proj.weight": "model-00052-of-00062.safetensors", - "model.layers.67.self_attn.v_proj.weight": "model-00052-of-00062.safetensors", - "model.layers.68.input_layernorm.weight": "model-00053-of-00062.safetensors", - "model.layers.68.mlp.down_proj.weight": "model-00053-of-00062.safetensors", - "model.layers.68.mlp.gate_proj.weight": "model-00053-of-00062.safetensors", - "model.layers.68.mlp.up_proj.weight": "model-00053-of-00062.safetensors", - "model.layers.68.post_attention_layernorm.weight": "model-00053-of-00062.safetensors", - "model.layers.68.self_attn.k_proj.weight": "model-00052-of-00062.safetensors", - "model.layers.68.self_attn.o_proj.weight": "model-00052-of-00062.safetensors", - "model.layers.68.self_attn.q_proj.weight": "model-00052-of-00062.safetensors", - "model.layers.68.self_attn.v_proj.weight": "model-00052-of-00062.safetensors", - "model.layers.69.input_layernorm.weight": "model-00054-of-00062.safetensors", - "model.layers.69.mlp.down_proj.weight": "model-00054-of-00062.safetensors", - "model.layers.69.mlp.gate_proj.weight": "model-00053-of-00062.safetensors", - "model.layers.69.mlp.up_proj.weight": "model-00054-of-00062.safetensors", - "model.layers.69.post_attention_layernorm.weight": "model-00054-of-00062.safetensors", - "model.layers.69.self_attn.k_proj.weight": "model-00053-of-00062.safetensors", - "model.layers.69.self_attn.o_proj.weight": "model-00053-of-00062.safetensors", - "model.layers.69.self_attn.q_proj.weight": "model-00053-of-00062.safetensors", - "model.layers.69.self_attn.v_proj.weight": "model-00053-of-00062.safetensors", - "model.layers.7.input_layernorm.weight": "model-00007-of-00062.safetensors", - "model.layers.7.mlp.down_proj.weight": "model-00007-of-00062.safetensors", - "model.layers.7.mlp.gate_proj.weight": "model-00007-of-00062.safetensors", - "model.layers.7.mlp.up_proj.weight": "model-00007-of-00062.safetensors", - "model.layers.7.post_attention_layernorm.weight": "model-00007-of-00062.safetensors", - "model.layers.7.self_attn.k_proj.weight": "model-00007-of-00062.safetensors", - "model.layers.7.self_attn.o_proj.weight": "model-00007-of-00062.safetensors", - "model.layers.7.self_attn.q_proj.weight": "model-00007-of-00062.safetensors", - "model.layers.7.self_attn.v_proj.weight": "model-00007-of-00062.safetensors", - "model.layers.70.input_layernorm.weight": "model-00055-of-00062.safetensors", - "model.layers.70.mlp.down_proj.weight": "model-00055-of-00062.safetensors", - "model.layers.70.mlp.gate_proj.weight": "model-00054-of-00062.safetensors", - "model.layers.70.mlp.up_proj.weight": "model-00054-of-00062.safetensors", - "model.layers.70.post_attention_layernorm.weight": "model-00055-of-00062.safetensors", - "model.layers.70.self_attn.k_proj.weight": "model-00054-of-00062.safetensors", - "model.layers.70.self_attn.o_proj.weight": "model-00054-of-00062.safetensors", - "model.layers.70.self_attn.q_proj.weight": "model-00054-of-00062.safetensors", - "model.layers.70.self_attn.v_proj.weight": "model-00054-of-00062.safetensors", - "model.layers.71.input_layernorm.weight": "model-00055-of-00062.safetensors", - "model.layers.71.mlp.down_proj.weight": "model-00055-of-00062.safetensors", - "model.layers.71.mlp.gate_proj.weight": "model-00055-of-00062.safetensors", - "model.layers.71.mlp.up_proj.weight": "model-00055-of-00062.safetensors", - "model.layers.71.post_attention_layernorm.weight": "model-00055-of-00062.safetensors", - "model.layers.71.self_attn.k_proj.weight": "model-00055-of-00062.safetensors", - "model.layers.71.self_attn.o_proj.weight": "model-00055-of-00062.safetensors", - "model.layers.71.self_attn.q_proj.weight": "model-00055-of-00062.safetensors", - "model.layers.71.self_attn.v_proj.weight": "model-00055-of-00062.safetensors", - "model.layers.72.input_layernorm.weight": "model-00056-of-00062.safetensors", - "model.layers.72.mlp.down_proj.weight": "model-00056-of-00062.safetensors", - "model.layers.72.mlp.gate_proj.weight": "model-00056-of-00062.safetensors", - "model.layers.72.mlp.up_proj.weight": "model-00056-of-00062.safetensors", - "model.layers.72.post_attention_layernorm.weight": "model-00056-of-00062.safetensors", - "model.layers.72.self_attn.k_proj.weight": "model-00055-of-00062.safetensors", - "model.layers.72.self_attn.o_proj.weight": "model-00055-of-00062.safetensors", - "model.layers.72.self_attn.q_proj.weight": "model-00055-of-00062.safetensors", - "model.layers.72.self_attn.v_proj.weight": "model-00055-of-00062.safetensors", - "model.layers.73.input_layernorm.weight": "model-00057-of-00062.safetensors", - "model.layers.73.mlp.down_proj.weight": "model-00057-of-00062.safetensors", - "model.layers.73.mlp.gate_proj.weight": "model-00056-of-00062.safetensors", - "model.layers.73.mlp.up_proj.weight": "model-00057-of-00062.safetensors", - "model.layers.73.post_attention_layernorm.weight": "model-00057-of-00062.safetensors", - "model.layers.73.self_attn.k_proj.weight": "model-00056-of-00062.safetensors", - "model.layers.73.self_attn.o_proj.weight": "model-00056-of-00062.safetensors", - "model.layers.73.self_attn.q_proj.weight": "model-00056-of-00062.safetensors", - "model.layers.73.self_attn.v_proj.weight": "model-00056-of-00062.safetensors", - "model.layers.74.input_layernorm.weight": "model-00058-of-00062.safetensors", - "model.layers.74.mlp.down_proj.weight": "model-00058-of-00062.safetensors", - "model.layers.74.mlp.gate_proj.weight": "model-00057-of-00062.safetensors", - "model.layers.74.mlp.up_proj.weight": "model-00057-of-00062.safetensors", - "model.layers.74.post_attention_layernorm.weight": "model-00058-of-00062.safetensors", - "model.layers.74.self_attn.k_proj.weight": "model-00057-of-00062.safetensors", - "model.layers.74.self_attn.o_proj.weight": "model-00057-of-00062.safetensors", - "model.layers.74.self_attn.q_proj.weight": "model-00057-of-00062.safetensors", - "model.layers.74.self_attn.v_proj.weight": "model-00057-of-00062.safetensors", - "model.layers.75.input_layernorm.weight": "model-00058-of-00062.safetensors", - "model.layers.75.mlp.down_proj.weight": "model-00058-of-00062.safetensors", - "model.layers.75.mlp.gate_proj.weight": "model-00058-of-00062.safetensors", - "model.layers.75.mlp.up_proj.weight": "model-00058-of-00062.safetensors", - "model.layers.75.post_attention_layernorm.weight": "model-00058-of-00062.safetensors", - "model.layers.75.self_attn.k_proj.weight": "model-00058-of-00062.safetensors", - "model.layers.75.self_attn.o_proj.weight": "model-00058-of-00062.safetensors", - "model.layers.75.self_attn.q_proj.weight": "model-00058-of-00062.safetensors", - "model.layers.75.self_attn.v_proj.weight": "model-00058-of-00062.safetensors", - "model.layers.76.input_layernorm.weight": "model-00059-of-00062.safetensors", - "model.layers.76.mlp.down_proj.weight": "model-00059-of-00062.safetensors", - "model.layers.76.mlp.gate_proj.weight": "model-00059-of-00062.safetensors", - "model.layers.76.mlp.up_proj.weight": "model-00059-of-00062.safetensors", - "model.layers.76.post_attention_layernorm.weight": "model-00059-of-00062.safetensors", - "model.layers.76.self_attn.k_proj.weight": "model-00058-of-00062.safetensors", - "model.layers.76.self_attn.o_proj.weight": "model-00058-of-00062.safetensors", - "model.layers.76.self_attn.q_proj.weight": "model-00058-of-00062.safetensors", - "model.layers.76.self_attn.v_proj.weight": "model-00058-of-00062.safetensors", - "model.layers.77.input_layernorm.weight": "model-00060-of-00062.safetensors", - "model.layers.77.mlp.down_proj.weight": "model-00060-of-00062.safetensors", - "model.layers.77.mlp.gate_proj.weight": "model-00059-of-00062.safetensors", - "model.layers.77.mlp.up_proj.weight": "model-00060-of-00062.safetensors", - "model.layers.77.post_attention_layernorm.weight": "model-00060-of-00062.safetensors", - "model.layers.77.self_attn.k_proj.weight": "model-00059-of-00062.safetensors", - "model.layers.77.self_attn.o_proj.weight": "model-00059-of-00062.safetensors", - "model.layers.77.self_attn.q_proj.weight": "model-00059-of-00062.safetensors", - "model.layers.77.self_attn.v_proj.weight": "model-00059-of-00062.safetensors", - "model.layers.78.input_layernorm.weight": "model-00061-of-00062.safetensors", - "model.layers.78.mlp.down_proj.weight": "model-00061-of-00062.safetensors", - "model.layers.78.mlp.gate_proj.weight": "model-00060-of-00062.safetensors", - "model.layers.78.mlp.up_proj.weight": "model-00060-of-00062.safetensors", - "model.layers.78.post_attention_layernorm.weight": "model-00061-of-00062.safetensors", - "model.layers.78.self_attn.k_proj.weight": "model-00060-of-00062.safetensors", - "model.layers.78.self_attn.o_proj.weight": "model-00060-of-00062.safetensors", - "model.layers.78.self_attn.q_proj.weight": "model-00060-of-00062.safetensors", - "model.layers.78.self_attn.v_proj.weight": "model-00060-of-00062.safetensors", - "model.layers.79.input_layernorm.weight": "model-00061-of-00062.safetensors", - "model.layers.79.mlp.down_proj.weight": "model-00061-of-00062.safetensors", - "model.layers.79.mlp.gate_proj.weight": "model-00061-of-00062.safetensors", - "model.layers.79.mlp.up_proj.weight": "model-00061-of-00062.safetensors", - "model.layers.79.post_attention_layernorm.weight": "model-00061-of-00062.safetensors", - "model.layers.79.self_attn.k_proj.weight": "model-00061-of-00062.safetensors", - "model.layers.79.self_attn.o_proj.weight": "model-00061-of-00062.safetensors", - "model.layers.79.self_attn.q_proj.weight": "model-00061-of-00062.safetensors", - "model.layers.79.self_attn.v_proj.weight": "model-00061-of-00062.safetensors", - "model.layers.8.input_layernorm.weight": "model-00008-of-00062.safetensors", - "model.layers.8.mlp.down_proj.weight": "model-00008-of-00062.safetensors", - "model.layers.8.mlp.gate_proj.weight": "model-00008-of-00062.safetensors", - "model.layers.8.mlp.up_proj.weight": "model-00008-of-00062.safetensors", - "model.layers.8.post_attention_layernorm.weight": "model-00008-of-00062.safetensors", - "model.layers.8.self_attn.k_proj.weight": "model-00007-of-00062.safetensors", - "model.layers.8.self_attn.o_proj.weight": "model-00007-of-00062.safetensors", - "model.layers.8.self_attn.q_proj.weight": "model-00007-of-00062.safetensors", - "model.layers.8.self_attn.v_proj.weight": "model-00007-of-00062.safetensors", - "model.layers.9.input_layernorm.weight": "model-00009-of-00062.safetensors", - "model.layers.9.mlp.down_proj.weight": "model-00009-of-00062.safetensors", - "model.layers.9.mlp.gate_proj.weight": "model-00008-of-00062.safetensors", - "model.layers.9.mlp.up_proj.weight": "model-00009-of-00062.safetensors", - "model.layers.9.post_attention_layernorm.weight": "model-00009-of-00062.safetensors", - "model.layers.9.self_attn.k_proj.weight": "model-00008-of-00062.safetensors", - "model.layers.9.self_attn.o_proj.weight": "model-00008-of-00062.safetensors", - "model.layers.9.self_attn.q_proj.weight": "model-00008-of-00062.safetensors", - "model.layers.9.self_attn.v_proj.weight": "model-00008-of-00062.safetensors", - "model.norm.weight": "model-00061-of-00062.safetensors" + "lm_head.weight": "model-00030-of-00030.safetensors", + "model.embed_tokens.weight": "model-00001-of-00030.safetensors", + "model.layers.0.input_layernorm.weight": "model-00001-of-00030.safetensors", + "model.layers.0.mlp.down_proj.weight": "model-00001-of-00030.safetensors", + "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00030.safetensors", + "model.layers.0.mlp.up_proj.weight": "model-00001-of-00030.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00030.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00030.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00030.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00030.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00030.safetensors", + "model.layers.1.input_layernorm.weight": "model-00002-of-00030.safetensors", + "model.layers.1.mlp.down_proj.weight": "model-00002-of-00030.safetensors", + "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00030.safetensors", + "model.layers.1.mlp.up_proj.weight": "model-00002-of-00030.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00002-of-00030.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00030.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00030.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00030.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00030.safetensors", + "model.layers.10.input_layernorm.weight": "model-00005-of-00030.safetensors", + "model.layers.10.mlp.down_proj.weight": "model-00005-of-00030.safetensors", + "model.layers.10.mlp.gate_proj.weight": "model-00005-of-00030.safetensors", + "model.layers.10.mlp.up_proj.weight": "model-00005-of-00030.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00005-of-00030.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00005-of-00030.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00005-of-00030.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00005-of-00030.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00005-of-00030.safetensors", + "model.layers.11.input_layernorm.weight": "model-00005-of-00030.safetensors", + "model.layers.11.mlp.down_proj.weight": "model-00005-of-00030.safetensors", + "model.layers.11.mlp.gate_proj.weight": "model-00005-of-00030.safetensors", + "model.layers.11.mlp.up_proj.weight": "model-00005-of-00030.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00005-of-00030.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00005-of-00030.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00005-of-00030.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00005-of-00030.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00005-of-00030.safetensors", + "model.layers.12.input_layernorm.weight": "model-00006-of-00030.safetensors", + "model.layers.12.mlp.down_proj.weight": "model-00006-of-00030.safetensors", + "model.layers.12.mlp.gate_proj.weight": "model-00005-of-00030.safetensors", + "model.layers.12.mlp.up_proj.weight": "model-00005-of-00030.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00006-of-00030.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00005-of-00030.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00005-of-00030.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00005-of-00030.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00005-of-00030.safetensors", + "model.layers.13.input_layernorm.weight": "model-00006-of-00030.safetensors", + "model.layers.13.mlp.down_proj.weight": "model-00006-of-00030.safetensors", + "model.layers.13.mlp.gate_proj.weight": "model-00006-of-00030.safetensors", + "model.layers.13.mlp.up_proj.weight": "model-00006-of-00030.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00006-of-00030.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00006-of-00030.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00006-of-00030.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00006-of-00030.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00006-of-00030.safetensors", + "model.layers.14.input_layernorm.weight": "model-00006-of-00030.safetensors", + "model.layers.14.mlp.down_proj.weight": "model-00006-of-00030.safetensors", + "model.layers.14.mlp.gate_proj.weight": "model-00006-of-00030.safetensors", + "model.layers.14.mlp.up_proj.weight": "model-00006-of-00030.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00006-of-00030.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00006-of-00030.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00006-of-00030.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00006-of-00030.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00006-of-00030.safetensors", + "model.layers.15.input_layernorm.weight": "model-00007-of-00030.safetensors", + "model.layers.15.mlp.down_proj.weight": "model-00007-of-00030.safetensors", + "model.layers.15.mlp.gate_proj.weight": "model-00006-of-00030.safetensors", + "model.layers.15.mlp.up_proj.weight": "model-00007-of-00030.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00007-of-00030.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00006-of-00030.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00006-of-00030.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00006-of-00030.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00006-of-00030.safetensors", + "model.layers.16.input_layernorm.weight": "model-00007-of-00030.safetensors", + "model.layers.16.mlp.down_proj.weight": "model-00007-of-00030.safetensors", + "model.layers.16.mlp.gate_proj.weight": "model-00007-of-00030.safetensors", + "model.layers.16.mlp.up_proj.weight": "model-00007-of-00030.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00007-of-00030.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00007-of-00030.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00007-of-00030.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00007-of-00030.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00007-of-00030.safetensors", + "model.layers.17.input_layernorm.weight": "model-00007-of-00030.safetensors", + "model.layers.17.mlp.down_proj.weight": "model-00007-of-00030.safetensors", + "model.layers.17.mlp.gate_proj.weight": "model-00007-of-00030.safetensors", + "model.layers.17.mlp.up_proj.weight": "model-00007-of-00030.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00007-of-00030.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00007-of-00030.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00007-of-00030.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00007-of-00030.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00007-of-00030.safetensors", + "model.layers.18.input_layernorm.weight": "model-00008-of-00030.safetensors", + "model.layers.18.mlp.down_proj.weight": "model-00008-of-00030.safetensors", + "model.layers.18.mlp.gate_proj.weight": "model-00008-of-00030.safetensors", + "model.layers.18.mlp.up_proj.weight": "model-00008-of-00030.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00008-of-00030.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00007-of-00030.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00007-of-00030.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00007-of-00030.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00007-of-00030.safetensors", + "model.layers.19.input_layernorm.weight": "model-00008-of-00030.safetensors", + "model.layers.19.mlp.down_proj.weight": "model-00008-of-00030.safetensors", + "model.layers.19.mlp.gate_proj.weight": "model-00008-of-00030.safetensors", + "model.layers.19.mlp.up_proj.weight": "model-00008-of-00030.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00008-of-00030.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00008-of-00030.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00008-of-00030.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00008-of-00030.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00008-of-00030.safetensors", + "model.layers.2.input_layernorm.weight": "model-00002-of-00030.safetensors", + "model.layers.2.mlp.down_proj.weight": "model-00002-of-00030.safetensors", + "model.layers.2.mlp.gate_proj.weight": "model-00002-of-00030.safetensors", + "model.layers.2.mlp.up_proj.weight": "model-00002-of-00030.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00002-of-00030.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00002-of-00030.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00002-of-00030.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00002-of-00030.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00002-of-00030.safetensors", + "model.layers.20.input_layernorm.weight": "model-00008-of-00030.safetensors", + "model.layers.20.mlp.down_proj.weight": "model-00008-of-00030.safetensors", + "model.layers.20.mlp.gate_proj.weight": "model-00008-of-00030.safetensors", + "model.layers.20.mlp.up_proj.weight": "model-00008-of-00030.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00008-of-00030.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00008-of-00030.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00008-of-00030.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00008-of-00030.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00008-of-00030.safetensors", + "model.layers.21.input_layernorm.weight": "model-00009-of-00030.safetensors", + "model.layers.21.mlp.down_proj.weight": "model-00009-of-00030.safetensors", + "model.layers.21.mlp.gate_proj.weight": "model-00009-of-00030.safetensors", + "model.layers.21.mlp.up_proj.weight": "model-00009-of-00030.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00009-of-00030.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00008-of-00030.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00009-of-00030.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00008-of-00030.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00008-of-00030.safetensors", + "model.layers.22.input_layernorm.weight": "model-00009-of-00030.safetensors", + "model.layers.22.mlp.down_proj.weight": "model-00009-of-00030.safetensors", + "model.layers.22.mlp.gate_proj.weight": "model-00009-of-00030.safetensors", + "model.layers.22.mlp.up_proj.weight": "model-00009-of-00030.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00009-of-00030.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00009-of-00030.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00009-of-00030.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00009-of-00030.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00009-of-00030.safetensors", + "model.layers.23.input_layernorm.weight": "model-00009-of-00030.safetensors", + "model.layers.23.mlp.down_proj.weight": "model-00009-of-00030.safetensors", + "model.layers.23.mlp.gate_proj.weight": "model-00009-of-00030.safetensors", + "model.layers.23.mlp.up_proj.weight": "model-00009-of-00030.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00009-of-00030.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00009-of-00030.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00009-of-00030.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00009-of-00030.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00009-of-00030.safetensors", + "model.layers.24.input_layernorm.weight": "model-00010-of-00030.safetensors", + "model.layers.24.mlp.down_proj.weight": "model-00010-of-00030.safetensors", + "model.layers.24.mlp.gate_proj.weight": "model-00010-of-00030.safetensors", + "model.layers.24.mlp.up_proj.weight": "model-00010-of-00030.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00010-of-00030.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00010-of-00030.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00010-of-00030.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00010-of-00030.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00010-of-00030.safetensors", + "model.layers.25.input_layernorm.weight": "model-00010-of-00030.safetensors", + "model.layers.25.mlp.down_proj.weight": "model-00010-of-00030.safetensors", + "model.layers.25.mlp.gate_proj.weight": "model-00010-of-00030.safetensors", + "model.layers.25.mlp.up_proj.weight": "model-00010-of-00030.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00010-of-00030.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00010-of-00030.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00010-of-00030.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00010-of-00030.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00010-of-00030.safetensors", + "model.layers.26.input_layernorm.weight": "model-00011-of-00030.safetensors", + "model.layers.26.mlp.down_proj.weight": "model-00011-of-00030.safetensors", + "model.layers.26.mlp.gate_proj.weight": "model-00010-of-00030.safetensors", + "model.layers.26.mlp.up_proj.weight": "model-00010-of-00030.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00011-of-00030.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00010-of-00030.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00010-of-00030.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00010-of-00030.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00010-of-00030.safetensors", + "model.layers.27.input_layernorm.weight": "model-00011-of-00030.safetensors", + "model.layers.27.mlp.down_proj.weight": "model-00011-of-00030.safetensors", + "model.layers.27.mlp.gate_proj.weight": "model-00011-of-00030.safetensors", + "model.layers.27.mlp.up_proj.weight": "model-00011-of-00030.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00011-of-00030.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00011-of-00030.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00011-of-00030.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00011-of-00030.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00011-of-00030.safetensors", + "model.layers.28.input_layernorm.weight": "model-00011-of-00030.safetensors", + "model.layers.28.mlp.down_proj.weight": "model-00011-of-00030.safetensors", + "model.layers.28.mlp.gate_proj.weight": "model-00011-of-00030.safetensors", + "model.layers.28.mlp.up_proj.weight": "model-00011-of-00030.safetensors", + "model.layers.28.post_attention_layernorm.weight": "model-00011-of-00030.safetensors", + "model.layers.28.self_attn.k_proj.weight": "model-00011-of-00030.safetensors", + "model.layers.28.self_attn.o_proj.weight": "model-00011-of-00030.safetensors", + "model.layers.28.self_attn.q_proj.weight": "model-00011-of-00030.safetensors", + "model.layers.28.self_attn.v_proj.weight": "model-00011-of-00030.safetensors", + "model.layers.29.input_layernorm.weight": "model-00012-of-00030.safetensors", + "model.layers.29.mlp.down_proj.weight": "model-00012-of-00030.safetensors", + "model.layers.29.mlp.gate_proj.weight": "model-00011-of-00030.safetensors", + "model.layers.29.mlp.up_proj.weight": "model-00012-of-00030.safetensors", + "model.layers.29.post_attention_layernorm.weight": "model-00012-of-00030.safetensors", + "model.layers.29.self_attn.k_proj.weight": "model-00011-of-00030.safetensors", + "model.layers.29.self_attn.o_proj.weight": "model-00011-of-00030.safetensors", + "model.layers.29.self_attn.q_proj.weight": "model-00011-of-00030.safetensors", + "model.layers.29.self_attn.v_proj.weight": "model-00011-of-00030.safetensors", + "model.layers.3.input_layernorm.weight": "model-00002-of-00030.safetensors", + "model.layers.3.mlp.down_proj.weight": "model-00002-of-00030.safetensors", + "model.layers.3.mlp.gate_proj.weight": "model-00002-of-00030.safetensors", + "model.layers.3.mlp.up_proj.weight": "model-00002-of-00030.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00002-of-00030.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00002-of-00030.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00002-of-00030.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00002-of-00030.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00002-of-00030.safetensors", + "model.layers.30.input_layernorm.weight": "model-00012-of-00030.safetensors", + "model.layers.30.mlp.down_proj.weight": "model-00012-of-00030.safetensors", + "model.layers.30.mlp.gate_proj.weight": "model-00012-of-00030.safetensors", + "model.layers.30.mlp.up_proj.weight": "model-00012-of-00030.safetensors", + "model.layers.30.post_attention_layernorm.weight": "model-00012-of-00030.safetensors", + "model.layers.30.self_attn.k_proj.weight": "model-00012-of-00030.safetensors", + "model.layers.30.self_attn.o_proj.weight": "model-00012-of-00030.safetensors", + "model.layers.30.self_attn.q_proj.weight": "model-00012-of-00030.safetensors", + "model.layers.30.self_attn.v_proj.weight": "model-00012-of-00030.safetensors", + "model.layers.31.input_layernorm.weight": "model-00012-of-00030.safetensors", + "model.layers.31.mlp.down_proj.weight": "model-00012-of-00030.safetensors", + "model.layers.31.mlp.gate_proj.weight": "model-00012-of-00030.safetensors", + "model.layers.31.mlp.up_proj.weight": "model-00012-of-00030.safetensors", + "model.layers.31.post_attention_layernorm.weight": "model-00012-of-00030.safetensors", + "model.layers.31.self_attn.k_proj.weight": "model-00012-of-00030.safetensors", + "model.layers.31.self_attn.o_proj.weight": "model-00012-of-00030.safetensors", + "model.layers.31.self_attn.q_proj.weight": "model-00012-of-00030.safetensors", + "model.layers.31.self_attn.v_proj.weight": "model-00012-of-00030.safetensors", + "model.layers.32.input_layernorm.weight": "model-00013-of-00030.safetensors", + "model.layers.32.mlp.down_proj.weight": "model-00013-of-00030.safetensors", + "model.layers.32.mlp.gate_proj.weight": "model-00013-of-00030.safetensors", + "model.layers.32.mlp.up_proj.weight": "model-00013-of-00030.safetensors", + "model.layers.32.post_attention_layernorm.weight": "model-00013-of-00030.safetensors", + "model.layers.32.self_attn.k_proj.weight": "model-00012-of-00030.safetensors", + "model.layers.32.self_attn.o_proj.weight": "model-00012-of-00030.safetensors", + "model.layers.32.self_attn.q_proj.weight": "model-00012-of-00030.safetensors", + "model.layers.32.self_attn.v_proj.weight": "model-00012-of-00030.safetensors", + "model.layers.33.input_layernorm.weight": "model-00013-of-00030.safetensors", + "model.layers.33.mlp.down_proj.weight": "model-00013-of-00030.safetensors", + "model.layers.33.mlp.gate_proj.weight": "model-00013-of-00030.safetensors", + "model.layers.33.mlp.up_proj.weight": "model-00013-of-00030.safetensors", + "model.layers.33.post_attention_layernorm.weight": "model-00013-of-00030.safetensors", + "model.layers.33.self_attn.k_proj.weight": "model-00013-of-00030.safetensors", + "model.layers.33.self_attn.o_proj.weight": "model-00013-of-00030.safetensors", + "model.layers.33.self_attn.q_proj.weight": "model-00013-of-00030.safetensors", + "model.layers.33.self_attn.v_proj.weight": "model-00013-of-00030.safetensors", + "model.layers.34.input_layernorm.weight": "model-00013-of-00030.safetensors", + "model.layers.34.mlp.down_proj.weight": "model-00013-of-00030.safetensors", + "model.layers.34.mlp.gate_proj.weight": "model-00013-of-00030.safetensors", + "model.layers.34.mlp.up_proj.weight": "model-00013-of-00030.safetensors", + "model.layers.34.post_attention_layernorm.weight": "model-00013-of-00030.safetensors", + "model.layers.34.self_attn.k_proj.weight": "model-00013-of-00030.safetensors", + "model.layers.34.self_attn.o_proj.weight": "model-00013-of-00030.safetensors", + "model.layers.34.self_attn.q_proj.weight": "model-00013-of-00030.safetensors", + "model.layers.34.self_attn.v_proj.weight": "model-00013-of-00030.safetensors", + "model.layers.35.input_layernorm.weight": "model-00014-of-00030.safetensors", + "model.layers.35.mlp.down_proj.weight": "model-00014-of-00030.safetensors", + "model.layers.35.mlp.gate_proj.weight": "model-00014-of-00030.safetensors", + "model.layers.35.mlp.up_proj.weight": "model-00014-of-00030.safetensors", + "model.layers.35.post_attention_layernorm.weight": "model-00014-of-00030.safetensors", + "model.layers.35.self_attn.k_proj.weight": "model-00013-of-00030.safetensors", + "model.layers.35.self_attn.o_proj.weight": "model-00014-of-00030.safetensors", + "model.layers.35.self_attn.q_proj.weight": "model-00013-of-00030.safetensors", + "model.layers.35.self_attn.v_proj.weight": "model-00013-of-00030.safetensors", + "model.layers.36.input_layernorm.weight": "model-00014-of-00030.safetensors", + "model.layers.36.mlp.down_proj.weight": "model-00014-of-00030.safetensors", + "model.layers.36.mlp.gate_proj.weight": "model-00014-of-00030.safetensors", + "model.layers.36.mlp.up_proj.weight": "model-00014-of-00030.safetensors", + "model.layers.36.post_attention_layernorm.weight": "model-00014-of-00030.safetensors", + "model.layers.36.self_attn.k_proj.weight": "model-00014-of-00030.safetensors", + "model.layers.36.self_attn.o_proj.weight": "model-00014-of-00030.safetensors", + "model.layers.36.self_attn.q_proj.weight": "model-00014-of-00030.safetensors", + "model.layers.36.self_attn.v_proj.weight": "model-00014-of-00030.safetensors", + "model.layers.37.input_layernorm.weight": "model-00014-of-00030.safetensors", + "model.layers.37.mlp.down_proj.weight": "model-00014-of-00030.safetensors", + "model.layers.37.mlp.gate_proj.weight": "model-00014-of-00030.safetensors", + "model.layers.37.mlp.up_proj.weight": "model-00014-of-00030.safetensors", + "model.layers.37.post_attention_layernorm.weight": "model-00014-of-00030.safetensors", + "model.layers.37.self_attn.k_proj.weight": "model-00014-of-00030.safetensors", + "model.layers.37.self_attn.o_proj.weight": "model-00014-of-00030.safetensors", + "model.layers.37.self_attn.q_proj.weight": "model-00014-of-00030.safetensors", + "model.layers.37.self_attn.v_proj.weight": "model-00014-of-00030.safetensors", + "model.layers.38.input_layernorm.weight": "model-00015-of-00030.safetensors", + "model.layers.38.mlp.down_proj.weight": "model-00015-of-00030.safetensors", + "model.layers.38.mlp.gate_proj.weight": "model-00015-of-00030.safetensors", + "model.layers.38.mlp.up_proj.weight": "model-00015-of-00030.safetensors", + "model.layers.38.post_attention_layernorm.weight": "model-00015-of-00030.safetensors", + "model.layers.38.self_attn.k_proj.weight": "model-00015-of-00030.safetensors", + "model.layers.38.self_attn.o_proj.weight": "model-00015-of-00030.safetensors", + "model.layers.38.self_attn.q_proj.weight": "model-00015-of-00030.safetensors", + "model.layers.38.self_attn.v_proj.weight": "model-00015-of-00030.safetensors", + "model.layers.39.input_layernorm.weight": "model-00015-of-00030.safetensors", + "model.layers.39.mlp.down_proj.weight": "model-00015-of-00030.safetensors", + "model.layers.39.mlp.gate_proj.weight": "model-00015-of-00030.safetensors", + "model.layers.39.mlp.up_proj.weight": "model-00015-of-00030.safetensors", + "model.layers.39.post_attention_layernorm.weight": "model-00015-of-00030.safetensors", + "model.layers.39.self_attn.k_proj.weight": "model-00015-of-00030.safetensors", + "model.layers.39.self_attn.o_proj.weight": "model-00015-of-00030.safetensors", + "model.layers.39.self_attn.q_proj.weight": "model-00015-of-00030.safetensors", + "model.layers.39.self_attn.v_proj.weight": "model-00015-of-00030.safetensors", + "model.layers.4.input_layernorm.weight": "model-00003-of-00030.safetensors", + "model.layers.4.mlp.down_proj.weight": "model-00003-of-00030.safetensors", + "model.layers.4.mlp.gate_proj.weight": "model-00003-of-00030.safetensors", + "model.layers.4.mlp.up_proj.weight": "model-00003-of-00030.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00003-of-00030.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00002-of-00030.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00002-of-00030.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00002-of-00030.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00002-of-00030.safetensors", + "model.layers.40.input_layernorm.weight": "model-00016-of-00030.safetensors", + "model.layers.40.mlp.down_proj.weight": "model-00016-of-00030.safetensors", + "model.layers.40.mlp.gate_proj.weight": "model-00015-of-00030.safetensors", + "model.layers.40.mlp.up_proj.weight": "model-00015-of-00030.safetensors", + "model.layers.40.post_attention_layernorm.weight": "model-00016-of-00030.safetensors", + "model.layers.40.self_attn.k_proj.weight": "model-00015-of-00030.safetensors", + "model.layers.40.self_attn.o_proj.weight": "model-00015-of-00030.safetensors", + "model.layers.40.self_attn.q_proj.weight": "model-00015-of-00030.safetensors", + "model.layers.40.self_attn.v_proj.weight": "model-00015-of-00030.safetensors", + "model.layers.41.input_layernorm.weight": "model-00016-of-00030.safetensors", + "model.layers.41.mlp.down_proj.weight": "model-00016-of-00030.safetensors", + "model.layers.41.mlp.gate_proj.weight": "model-00016-of-00030.safetensors", + "model.layers.41.mlp.up_proj.weight": "model-00016-of-00030.safetensors", + "model.layers.41.post_attention_layernorm.weight": "model-00016-of-00030.safetensors", + "model.layers.41.self_attn.k_proj.weight": "model-00016-of-00030.safetensors", + "model.layers.41.self_attn.o_proj.weight": "model-00016-of-00030.safetensors", + "model.layers.41.self_attn.q_proj.weight": "model-00016-of-00030.safetensors", + "model.layers.41.self_attn.v_proj.weight": "model-00016-of-00030.safetensors", + "model.layers.42.input_layernorm.weight": "model-00016-of-00030.safetensors", + "model.layers.42.mlp.down_proj.weight": "model-00016-of-00030.safetensors", + "model.layers.42.mlp.gate_proj.weight": "model-00016-of-00030.safetensors", + "model.layers.42.mlp.up_proj.weight": "model-00016-of-00030.safetensors", + "model.layers.42.post_attention_layernorm.weight": "model-00016-of-00030.safetensors", + "model.layers.42.self_attn.k_proj.weight": "model-00016-of-00030.safetensors", + "model.layers.42.self_attn.o_proj.weight": "model-00016-of-00030.safetensors", + "model.layers.42.self_attn.q_proj.weight": "model-00016-of-00030.safetensors", + "model.layers.42.self_attn.v_proj.weight": "model-00016-of-00030.safetensors", + "model.layers.43.input_layernorm.weight": "model-00017-of-00030.safetensors", + "model.layers.43.mlp.down_proj.weight": "model-00017-of-00030.safetensors", + "model.layers.43.mlp.gate_proj.weight": "model-00016-of-00030.safetensors", + "model.layers.43.mlp.up_proj.weight": "model-00017-of-00030.safetensors", + "model.layers.43.post_attention_layernorm.weight": "model-00017-of-00030.safetensors", + "model.layers.43.self_attn.k_proj.weight": "model-00016-of-00030.safetensors", + "model.layers.43.self_attn.o_proj.weight": "model-00016-of-00030.safetensors", + "model.layers.43.self_attn.q_proj.weight": "model-00016-of-00030.safetensors", + "model.layers.43.self_attn.v_proj.weight": "model-00016-of-00030.safetensors", + "model.layers.44.input_layernorm.weight": "model-00017-of-00030.safetensors", + "model.layers.44.mlp.down_proj.weight": "model-00017-of-00030.safetensors", + "model.layers.44.mlp.gate_proj.weight": "model-00017-of-00030.safetensors", + "model.layers.44.mlp.up_proj.weight": "model-00017-of-00030.safetensors", + "model.layers.44.post_attention_layernorm.weight": "model-00017-of-00030.safetensors", + "model.layers.44.self_attn.k_proj.weight": "model-00017-of-00030.safetensors", + "model.layers.44.self_attn.o_proj.weight": "model-00017-of-00030.safetensors", + "model.layers.44.self_attn.q_proj.weight": "model-00017-of-00030.safetensors", + "model.layers.44.self_attn.v_proj.weight": "model-00017-of-00030.safetensors", + "model.layers.45.input_layernorm.weight": "model-00017-of-00030.safetensors", + "model.layers.45.mlp.down_proj.weight": "model-00017-of-00030.safetensors", + "model.layers.45.mlp.gate_proj.weight": "model-00017-of-00030.safetensors", + "model.layers.45.mlp.up_proj.weight": "model-00017-of-00030.safetensors", + "model.layers.45.post_attention_layernorm.weight": "model-00017-of-00030.safetensors", + "model.layers.45.self_attn.k_proj.weight": "model-00017-of-00030.safetensors", + "model.layers.45.self_attn.o_proj.weight": "model-00017-of-00030.safetensors", + "model.layers.45.self_attn.q_proj.weight": "model-00017-of-00030.safetensors", + "model.layers.45.self_attn.v_proj.weight": "model-00017-of-00030.safetensors", + "model.layers.46.input_layernorm.weight": "model-00018-of-00030.safetensors", + "model.layers.46.mlp.down_proj.weight": "model-00018-of-00030.safetensors", + "model.layers.46.mlp.gate_proj.weight": "model-00018-of-00030.safetensors", + "model.layers.46.mlp.up_proj.weight": "model-00018-of-00030.safetensors", + "model.layers.46.post_attention_layernorm.weight": "model-00018-of-00030.safetensors", + "model.layers.46.self_attn.k_proj.weight": "model-00017-of-00030.safetensors", + "model.layers.46.self_attn.o_proj.weight": "model-00017-of-00030.safetensors", + "model.layers.46.self_attn.q_proj.weight": "model-00017-of-00030.safetensors", + "model.layers.46.self_attn.v_proj.weight": "model-00017-of-00030.safetensors", + "model.layers.47.input_layernorm.weight": "model-00018-of-00030.safetensors", + "model.layers.47.mlp.down_proj.weight": "model-00018-of-00030.safetensors", + "model.layers.47.mlp.gate_proj.weight": "model-00018-of-00030.safetensors", + "model.layers.47.mlp.up_proj.weight": "model-00018-of-00030.safetensors", + "model.layers.47.post_attention_layernorm.weight": "model-00018-of-00030.safetensors", + "model.layers.47.self_attn.k_proj.weight": "model-00018-of-00030.safetensors", + "model.layers.47.self_attn.o_proj.weight": "model-00018-of-00030.safetensors", + "model.layers.47.self_attn.q_proj.weight": "model-00018-of-00030.safetensors", + "model.layers.47.self_attn.v_proj.weight": "model-00018-of-00030.safetensors", + "model.layers.48.input_layernorm.weight": "model-00018-of-00030.safetensors", + "model.layers.48.mlp.down_proj.weight": "model-00018-of-00030.safetensors", + "model.layers.48.mlp.gate_proj.weight": "model-00018-of-00030.safetensors", + "model.layers.48.mlp.up_proj.weight": "model-00018-of-00030.safetensors", + "model.layers.48.post_attention_layernorm.weight": "model-00018-of-00030.safetensors", + "model.layers.48.self_attn.k_proj.weight": "model-00018-of-00030.safetensors", + "model.layers.48.self_attn.o_proj.weight": "model-00018-of-00030.safetensors", + "model.layers.48.self_attn.q_proj.weight": "model-00018-of-00030.safetensors", + "model.layers.48.self_attn.v_proj.weight": "model-00018-of-00030.safetensors", + "model.layers.49.input_layernorm.weight": "model-00019-of-00030.safetensors", + "model.layers.49.mlp.down_proj.weight": "model-00019-of-00030.safetensors", + "model.layers.49.mlp.gate_proj.weight": "model-00019-of-00030.safetensors", + "model.layers.49.mlp.up_proj.weight": "model-00019-of-00030.safetensors", + "model.layers.49.post_attention_layernorm.weight": "model-00019-of-00030.safetensors", + "model.layers.49.self_attn.k_proj.weight": "model-00018-of-00030.safetensors", + "model.layers.49.self_attn.o_proj.weight": "model-00019-of-00030.safetensors", + "model.layers.49.self_attn.q_proj.weight": "model-00018-of-00030.safetensors", + "model.layers.49.self_attn.v_proj.weight": "model-00018-of-00030.safetensors", + "model.layers.5.input_layernorm.weight": "model-00003-of-00030.safetensors", + "model.layers.5.mlp.down_proj.weight": "model-00003-of-00030.safetensors", + "model.layers.5.mlp.gate_proj.weight": "model-00003-of-00030.safetensors", + "model.layers.5.mlp.up_proj.weight": "model-00003-of-00030.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00003-of-00030.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00003-of-00030.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00003-of-00030.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00003-of-00030.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00003-of-00030.safetensors", + "model.layers.50.input_layernorm.weight": "model-00019-of-00030.safetensors", + "model.layers.50.mlp.down_proj.weight": "model-00019-of-00030.safetensors", + "model.layers.50.mlp.gate_proj.weight": "model-00019-of-00030.safetensors", + "model.layers.50.mlp.up_proj.weight": "model-00019-of-00030.safetensors", + "model.layers.50.post_attention_layernorm.weight": "model-00019-of-00030.safetensors", + "model.layers.50.self_attn.k_proj.weight": "model-00019-of-00030.safetensors", + "model.layers.50.self_attn.o_proj.weight": "model-00019-of-00030.safetensors", + "model.layers.50.self_attn.q_proj.weight": "model-00019-of-00030.safetensors", + "model.layers.50.self_attn.v_proj.weight": "model-00019-of-00030.safetensors", + "model.layers.51.input_layernorm.weight": "model-00019-of-00030.safetensors", + "model.layers.51.mlp.down_proj.weight": "model-00019-of-00030.safetensors", + "model.layers.51.mlp.gate_proj.weight": "model-00019-of-00030.safetensors", + "model.layers.51.mlp.up_proj.weight": "model-00019-of-00030.safetensors", + "model.layers.51.post_attention_layernorm.weight": "model-00019-of-00030.safetensors", + "model.layers.51.self_attn.k_proj.weight": "model-00019-of-00030.safetensors", + "model.layers.51.self_attn.o_proj.weight": "model-00019-of-00030.safetensors", + "model.layers.51.self_attn.q_proj.weight": "model-00019-of-00030.safetensors", + "model.layers.51.self_attn.v_proj.weight": "model-00019-of-00030.safetensors", + "model.layers.52.input_layernorm.weight": "model-00020-of-00030.safetensors", + "model.layers.52.mlp.down_proj.weight": "model-00020-of-00030.safetensors", + "model.layers.52.mlp.gate_proj.weight": "model-00020-of-00030.safetensors", + "model.layers.52.mlp.up_proj.weight": "model-00020-of-00030.safetensors", + "model.layers.52.post_attention_layernorm.weight": "model-00020-of-00030.safetensors", + "model.layers.52.self_attn.k_proj.weight": "model-00020-of-00030.safetensors", + "model.layers.52.self_attn.o_proj.weight": "model-00020-of-00030.safetensors", + "model.layers.52.self_attn.q_proj.weight": "model-00020-of-00030.safetensors", + "model.layers.52.self_attn.v_proj.weight": "model-00020-of-00030.safetensors", + "model.layers.53.input_layernorm.weight": "model-00020-of-00030.safetensors", + "model.layers.53.mlp.down_proj.weight": "model-00020-of-00030.safetensors", + "model.layers.53.mlp.gate_proj.weight": "model-00020-of-00030.safetensors", + "model.layers.53.mlp.up_proj.weight": "model-00020-of-00030.safetensors", + "model.layers.53.post_attention_layernorm.weight": "model-00020-of-00030.safetensors", + "model.layers.53.self_attn.k_proj.weight": "model-00020-of-00030.safetensors", + "model.layers.53.self_attn.o_proj.weight": "model-00020-of-00030.safetensors", + "model.layers.53.self_attn.q_proj.weight": "model-00020-of-00030.safetensors", + "model.layers.53.self_attn.v_proj.weight": "model-00020-of-00030.safetensors", + "model.layers.54.input_layernorm.weight": "model-00021-of-00030.safetensors", + "model.layers.54.mlp.down_proj.weight": "model-00021-of-00030.safetensors", + "model.layers.54.mlp.gate_proj.weight": "model-00020-of-00030.safetensors", + "model.layers.54.mlp.up_proj.weight": "model-00020-of-00030.safetensors", + "model.layers.54.post_attention_layernorm.weight": "model-00021-of-00030.safetensors", + "model.layers.54.self_attn.k_proj.weight": "model-00020-of-00030.safetensors", + "model.layers.54.self_attn.o_proj.weight": "model-00020-of-00030.safetensors", + "model.layers.54.self_attn.q_proj.weight": "model-00020-of-00030.safetensors", + "model.layers.54.self_attn.v_proj.weight": "model-00020-of-00030.safetensors", + "model.layers.55.input_layernorm.weight": "model-00021-of-00030.safetensors", + "model.layers.55.mlp.down_proj.weight": "model-00021-of-00030.safetensors", + "model.layers.55.mlp.gate_proj.weight": "model-00021-of-00030.safetensors", + "model.layers.55.mlp.up_proj.weight": "model-00021-of-00030.safetensors", + "model.layers.55.post_attention_layernorm.weight": "model-00021-of-00030.safetensors", + "model.layers.55.self_attn.k_proj.weight": "model-00021-of-00030.safetensors", + "model.layers.55.self_attn.o_proj.weight": "model-00021-of-00030.safetensors", + "model.layers.55.self_attn.q_proj.weight": "model-00021-of-00030.safetensors", + "model.layers.55.self_attn.v_proj.weight": "model-00021-of-00030.safetensors", + "model.layers.56.input_layernorm.weight": "model-00021-of-00030.safetensors", + "model.layers.56.mlp.down_proj.weight": "model-00021-of-00030.safetensors", + "model.layers.56.mlp.gate_proj.weight": "model-00021-of-00030.safetensors", + "model.layers.56.mlp.up_proj.weight": "model-00021-of-00030.safetensors", + "model.layers.56.post_attention_layernorm.weight": "model-00021-of-00030.safetensors", + "model.layers.56.self_attn.k_proj.weight": "model-00021-of-00030.safetensors", + "model.layers.56.self_attn.o_proj.weight": "model-00021-of-00030.safetensors", + "model.layers.56.self_attn.q_proj.weight": "model-00021-of-00030.safetensors", + "model.layers.56.self_attn.v_proj.weight": "model-00021-of-00030.safetensors", + "model.layers.57.input_layernorm.weight": "model-00022-of-00030.safetensors", + "model.layers.57.mlp.down_proj.weight": "model-00022-of-00030.safetensors", + "model.layers.57.mlp.gate_proj.weight": "model-00021-of-00030.safetensors", + "model.layers.57.mlp.up_proj.weight": "model-00022-of-00030.safetensors", + "model.layers.57.post_attention_layernorm.weight": "model-00022-of-00030.safetensors", + "model.layers.57.self_attn.k_proj.weight": "model-00021-of-00030.safetensors", + "model.layers.57.self_attn.o_proj.weight": "model-00021-of-00030.safetensors", + "model.layers.57.self_attn.q_proj.weight": "model-00021-of-00030.safetensors", + "model.layers.57.self_attn.v_proj.weight": "model-00021-of-00030.safetensors", + "model.layers.58.input_layernorm.weight": "model-00022-of-00030.safetensors", + "model.layers.58.mlp.down_proj.weight": "model-00022-of-00030.safetensors", + "model.layers.58.mlp.gate_proj.weight": "model-00022-of-00030.safetensors", + "model.layers.58.mlp.up_proj.weight": "model-00022-of-00030.safetensors", + "model.layers.58.post_attention_layernorm.weight": "model-00022-of-00030.safetensors", + "model.layers.58.self_attn.k_proj.weight": "model-00022-of-00030.safetensors", + "model.layers.58.self_attn.o_proj.weight": "model-00022-of-00030.safetensors", + "model.layers.58.self_attn.q_proj.weight": "model-00022-of-00030.safetensors", + "model.layers.58.self_attn.v_proj.weight": "model-00022-of-00030.safetensors", + "model.layers.59.input_layernorm.weight": "model-00022-of-00030.safetensors", + "model.layers.59.mlp.down_proj.weight": "model-00022-of-00030.safetensors", + "model.layers.59.mlp.gate_proj.weight": "model-00022-of-00030.safetensors", + "model.layers.59.mlp.up_proj.weight": "model-00022-of-00030.safetensors", + "model.layers.59.post_attention_layernorm.weight": "model-00022-of-00030.safetensors", + "model.layers.59.self_attn.k_proj.weight": "model-00022-of-00030.safetensors", + "model.layers.59.self_attn.o_proj.weight": "model-00022-of-00030.safetensors", + "model.layers.59.self_attn.q_proj.weight": "model-00022-of-00030.safetensors", + "model.layers.59.self_attn.v_proj.weight": "model-00022-of-00030.safetensors", + "model.layers.6.input_layernorm.weight": "model-00003-of-00030.safetensors", + "model.layers.6.mlp.down_proj.weight": "model-00003-of-00030.safetensors", + "model.layers.6.mlp.gate_proj.weight": "model-00003-of-00030.safetensors", + "model.layers.6.mlp.up_proj.weight": "model-00003-of-00030.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00003-of-00030.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00003-of-00030.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00003-of-00030.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00003-of-00030.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00003-of-00030.safetensors", + "model.layers.60.input_layernorm.weight": "model-00023-of-00030.safetensors", + "model.layers.60.mlp.down_proj.weight": "model-00023-of-00030.safetensors", + "model.layers.60.mlp.gate_proj.weight": "model-00023-of-00030.safetensors", + "model.layers.60.mlp.up_proj.weight": "model-00023-of-00030.safetensors", + "model.layers.60.post_attention_layernorm.weight": "model-00023-of-00030.safetensors", + "model.layers.60.self_attn.k_proj.weight": "model-00022-of-00030.safetensors", + "model.layers.60.self_attn.o_proj.weight": "model-00022-of-00030.safetensors", + "model.layers.60.self_attn.q_proj.weight": "model-00022-of-00030.safetensors", + "model.layers.60.self_attn.v_proj.weight": "model-00022-of-00030.safetensors", + "model.layers.61.input_layernorm.weight": "model-00023-of-00030.safetensors", + "model.layers.61.mlp.down_proj.weight": "model-00023-of-00030.safetensors", + "model.layers.61.mlp.gate_proj.weight": "model-00023-of-00030.safetensors", + "model.layers.61.mlp.up_proj.weight": "model-00023-of-00030.safetensors", + "model.layers.61.post_attention_layernorm.weight": "model-00023-of-00030.safetensors", + "model.layers.61.self_attn.k_proj.weight": "model-00023-of-00030.safetensors", + "model.layers.61.self_attn.o_proj.weight": "model-00023-of-00030.safetensors", + "model.layers.61.self_attn.q_proj.weight": "model-00023-of-00030.safetensors", + "model.layers.61.self_attn.v_proj.weight": "model-00023-of-00030.safetensors", + "model.layers.62.input_layernorm.weight": "model-00023-of-00030.safetensors", + "model.layers.62.mlp.down_proj.weight": "model-00023-of-00030.safetensors", + "model.layers.62.mlp.gate_proj.weight": "model-00023-of-00030.safetensors", + "model.layers.62.mlp.up_proj.weight": "model-00023-of-00030.safetensors", + "model.layers.62.post_attention_layernorm.weight": "model-00023-of-00030.safetensors", + "model.layers.62.self_attn.k_proj.weight": "model-00023-of-00030.safetensors", + "model.layers.62.self_attn.o_proj.weight": "model-00023-of-00030.safetensors", + "model.layers.62.self_attn.q_proj.weight": "model-00023-of-00030.safetensors", + "model.layers.62.self_attn.v_proj.weight": "model-00023-of-00030.safetensors", + "model.layers.63.input_layernorm.weight": "model-00024-of-00030.safetensors", + "model.layers.63.mlp.down_proj.weight": "model-00024-of-00030.safetensors", + "model.layers.63.mlp.gate_proj.weight": "model-00024-of-00030.safetensors", + "model.layers.63.mlp.up_proj.weight": "model-00024-of-00030.safetensors", + "model.layers.63.post_attention_layernorm.weight": "model-00024-of-00030.safetensors", + "model.layers.63.self_attn.k_proj.weight": "model-00023-of-00030.safetensors", + "model.layers.63.self_attn.o_proj.weight": "model-00024-of-00030.safetensors", + "model.layers.63.self_attn.q_proj.weight": "model-00023-of-00030.safetensors", + "model.layers.63.self_attn.v_proj.weight": "model-00023-of-00030.safetensors", + "model.layers.64.input_layernorm.weight": "model-00024-of-00030.safetensors", + "model.layers.64.mlp.down_proj.weight": "model-00024-of-00030.safetensors", + "model.layers.64.mlp.gate_proj.weight": "model-00024-of-00030.safetensors", + "model.layers.64.mlp.up_proj.weight": "model-00024-of-00030.safetensors", + "model.layers.64.post_attention_layernorm.weight": "model-00024-of-00030.safetensors", + "model.layers.64.self_attn.k_proj.weight": "model-00024-of-00030.safetensors", + "model.layers.64.self_attn.o_proj.weight": "model-00024-of-00030.safetensors", + "model.layers.64.self_attn.q_proj.weight": "model-00024-of-00030.safetensors", + "model.layers.64.self_attn.v_proj.weight": "model-00024-of-00030.safetensors", + "model.layers.65.input_layernorm.weight": "model-00024-of-00030.safetensors", + "model.layers.65.mlp.down_proj.weight": "model-00024-of-00030.safetensors", + "model.layers.65.mlp.gate_proj.weight": "model-00024-of-00030.safetensors", + "model.layers.65.mlp.up_proj.weight": "model-00024-of-00030.safetensors", + "model.layers.65.post_attention_layernorm.weight": "model-00024-of-00030.safetensors", + "model.layers.65.self_attn.k_proj.weight": "model-00024-of-00030.safetensors", + "model.layers.65.self_attn.o_proj.weight": "model-00024-of-00030.safetensors", + "model.layers.65.self_attn.q_proj.weight": "model-00024-of-00030.safetensors", + "model.layers.65.self_attn.v_proj.weight": "model-00024-of-00030.safetensors", + "model.layers.66.input_layernorm.weight": "model-00025-of-00030.safetensors", + "model.layers.66.mlp.down_proj.weight": "model-00025-of-00030.safetensors", + "model.layers.66.mlp.gate_proj.weight": "model-00025-of-00030.safetensors", + "model.layers.66.mlp.up_proj.weight": "model-00025-of-00030.safetensors", + "model.layers.66.post_attention_layernorm.weight": "model-00025-of-00030.safetensors", + "model.layers.66.self_attn.k_proj.weight": "model-00025-of-00030.safetensors", + "model.layers.66.self_attn.o_proj.weight": "model-00025-of-00030.safetensors", + "model.layers.66.self_attn.q_proj.weight": "model-00025-of-00030.safetensors", + "model.layers.66.self_attn.v_proj.weight": "model-00025-of-00030.safetensors", + "model.layers.67.input_layernorm.weight": "model-00025-of-00030.safetensors", + "model.layers.67.mlp.down_proj.weight": "model-00025-of-00030.safetensors", + "model.layers.67.mlp.gate_proj.weight": "model-00025-of-00030.safetensors", + "model.layers.67.mlp.up_proj.weight": "model-00025-of-00030.safetensors", + "model.layers.67.post_attention_layernorm.weight": "model-00025-of-00030.safetensors", + "model.layers.67.self_attn.k_proj.weight": "model-00025-of-00030.safetensors", + "model.layers.67.self_attn.o_proj.weight": "model-00025-of-00030.safetensors", + "model.layers.67.self_attn.q_proj.weight": "model-00025-of-00030.safetensors", + "model.layers.67.self_attn.v_proj.weight": "model-00025-of-00030.safetensors", + "model.layers.68.input_layernorm.weight": "model-00026-of-00030.safetensors", + "model.layers.68.mlp.down_proj.weight": "model-00026-of-00030.safetensors", + "model.layers.68.mlp.gate_proj.weight": "model-00025-of-00030.safetensors", + "model.layers.68.mlp.up_proj.weight": "model-00025-of-00030.safetensors", + "model.layers.68.post_attention_layernorm.weight": "model-00026-of-00030.safetensors", + "model.layers.68.self_attn.k_proj.weight": "model-00025-of-00030.safetensors", + "model.layers.68.self_attn.o_proj.weight": "model-00025-of-00030.safetensors", + "model.layers.68.self_attn.q_proj.weight": "model-00025-of-00030.safetensors", + "model.layers.68.self_attn.v_proj.weight": "model-00025-of-00030.safetensors", + "model.layers.69.input_layernorm.weight": "model-00026-of-00030.safetensors", + "model.layers.69.mlp.down_proj.weight": "model-00026-of-00030.safetensors", + "model.layers.69.mlp.gate_proj.weight": "model-00026-of-00030.safetensors", + "model.layers.69.mlp.up_proj.weight": "model-00026-of-00030.safetensors", + "model.layers.69.post_attention_layernorm.weight": "model-00026-of-00030.safetensors", + "model.layers.69.self_attn.k_proj.weight": "model-00026-of-00030.safetensors", + "model.layers.69.self_attn.o_proj.weight": "model-00026-of-00030.safetensors", + "model.layers.69.self_attn.q_proj.weight": "model-00026-of-00030.safetensors", + "model.layers.69.self_attn.v_proj.weight": "model-00026-of-00030.safetensors", + "model.layers.7.input_layernorm.weight": "model-00004-of-00030.safetensors", + "model.layers.7.mlp.down_proj.weight": "model-00004-of-00030.safetensors", + "model.layers.7.mlp.gate_proj.weight": "model-00004-of-00030.safetensors", + "model.layers.7.mlp.up_proj.weight": "model-00004-of-00030.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00004-of-00030.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00003-of-00030.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00004-of-00030.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00003-of-00030.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00003-of-00030.safetensors", + "model.layers.70.input_layernorm.weight": "model-00026-of-00030.safetensors", + "model.layers.70.mlp.down_proj.weight": "model-00026-of-00030.safetensors", + "model.layers.70.mlp.gate_proj.weight": "model-00026-of-00030.safetensors", + "model.layers.70.mlp.up_proj.weight": "model-00026-of-00030.safetensors", + "model.layers.70.post_attention_layernorm.weight": "model-00026-of-00030.safetensors", + "model.layers.70.self_attn.k_proj.weight": "model-00026-of-00030.safetensors", + "model.layers.70.self_attn.o_proj.weight": "model-00026-of-00030.safetensors", + "model.layers.70.self_attn.q_proj.weight": "model-00026-of-00030.safetensors", + "model.layers.70.self_attn.v_proj.weight": "model-00026-of-00030.safetensors", + "model.layers.71.input_layernorm.weight": "model-00027-of-00030.safetensors", + "model.layers.71.mlp.down_proj.weight": "model-00027-of-00030.safetensors", + "model.layers.71.mlp.gate_proj.weight": "model-00026-of-00030.safetensors", + "model.layers.71.mlp.up_proj.weight": "model-00027-of-00030.safetensors", + "model.layers.71.post_attention_layernorm.weight": "model-00027-of-00030.safetensors", + "model.layers.71.self_attn.k_proj.weight": "model-00026-of-00030.safetensors", + "model.layers.71.self_attn.o_proj.weight": "model-00026-of-00030.safetensors", + "model.layers.71.self_attn.q_proj.weight": "model-00026-of-00030.safetensors", + "model.layers.71.self_attn.v_proj.weight": "model-00026-of-00030.safetensors", + "model.layers.72.input_layernorm.weight": "model-00027-of-00030.safetensors", + "model.layers.72.mlp.down_proj.weight": "model-00027-of-00030.safetensors", + "model.layers.72.mlp.gate_proj.weight": "model-00027-of-00030.safetensors", + "model.layers.72.mlp.up_proj.weight": "model-00027-of-00030.safetensors", + "model.layers.72.post_attention_layernorm.weight": "model-00027-of-00030.safetensors", + "model.layers.72.self_attn.k_proj.weight": "model-00027-of-00030.safetensors", + "model.layers.72.self_attn.o_proj.weight": "model-00027-of-00030.safetensors", + "model.layers.72.self_attn.q_proj.weight": "model-00027-of-00030.safetensors", + "model.layers.72.self_attn.v_proj.weight": "model-00027-of-00030.safetensors", + "model.layers.73.input_layernorm.weight": "model-00027-of-00030.safetensors", + "model.layers.73.mlp.down_proj.weight": "model-00027-of-00030.safetensors", + "model.layers.73.mlp.gate_proj.weight": "model-00027-of-00030.safetensors", + "model.layers.73.mlp.up_proj.weight": "model-00027-of-00030.safetensors", + "model.layers.73.post_attention_layernorm.weight": "model-00027-of-00030.safetensors", + "model.layers.73.self_attn.k_proj.weight": "model-00027-of-00030.safetensors", + "model.layers.73.self_attn.o_proj.weight": "model-00027-of-00030.safetensors", + "model.layers.73.self_attn.q_proj.weight": "model-00027-of-00030.safetensors", + "model.layers.73.self_attn.v_proj.weight": "model-00027-of-00030.safetensors", + "model.layers.74.input_layernorm.weight": "model-00028-of-00030.safetensors", + "model.layers.74.mlp.down_proj.weight": "model-00028-of-00030.safetensors", + "model.layers.74.mlp.gate_proj.weight": "model-00028-of-00030.safetensors", + "model.layers.74.mlp.up_proj.weight": "model-00028-of-00030.safetensors", + "model.layers.74.post_attention_layernorm.weight": "model-00028-of-00030.safetensors", + "model.layers.74.self_attn.k_proj.weight": "model-00027-of-00030.safetensors", + "model.layers.74.self_attn.o_proj.weight": "model-00027-of-00030.safetensors", + "model.layers.74.self_attn.q_proj.weight": "model-00027-of-00030.safetensors", + "model.layers.74.self_attn.v_proj.weight": "model-00027-of-00030.safetensors", + "model.layers.75.input_layernorm.weight": "model-00028-of-00030.safetensors", + "model.layers.75.mlp.down_proj.weight": "model-00028-of-00030.safetensors", + "model.layers.75.mlp.gate_proj.weight": "model-00028-of-00030.safetensors", + "model.layers.75.mlp.up_proj.weight": "model-00028-of-00030.safetensors", + "model.layers.75.post_attention_layernorm.weight": "model-00028-of-00030.safetensors", + "model.layers.75.self_attn.k_proj.weight": "model-00028-of-00030.safetensors", + "model.layers.75.self_attn.o_proj.weight": "model-00028-of-00030.safetensors", + "model.layers.75.self_attn.q_proj.weight": "model-00028-of-00030.safetensors", + "model.layers.75.self_attn.v_proj.weight": "model-00028-of-00030.safetensors", + "model.layers.76.input_layernorm.weight": "model-00028-of-00030.safetensors", + "model.layers.76.mlp.down_proj.weight": "model-00028-of-00030.safetensors", + "model.layers.76.mlp.gate_proj.weight": "model-00028-of-00030.safetensors", + "model.layers.76.mlp.up_proj.weight": "model-00028-of-00030.safetensors", + "model.layers.76.post_attention_layernorm.weight": "model-00028-of-00030.safetensors", + "model.layers.76.self_attn.k_proj.weight": "model-00028-of-00030.safetensors", + "model.layers.76.self_attn.o_proj.weight": "model-00028-of-00030.safetensors", + "model.layers.76.self_attn.q_proj.weight": "model-00028-of-00030.safetensors", + "model.layers.76.self_attn.v_proj.weight": "model-00028-of-00030.safetensors", + "model.layers.77.input_layernorm.weight": "model-00029-of-00030.safetensors", + "model.layers.77.mlp.down_proj.weight": "model-00029-of-00030.safetensors", + "model.layers.77.mlp.gate_proj.weight": "model-00029-of-00030.safetensors", + "model.layers.77.mlp.up_proj.weight": "model-00029-of-00030.safetensors", + "model.layers.77.post_attention_layernorm.weight": "model-00029-of-00030.safetensors", + "model.layers.77.self_attn.k_proj.weight": "model-00028-of-00030.safetensors", + "model.layers.77.self_attn.o_proj.weight": "model-00029-of-00030.safetensors", + "model.layers.77.self_attn.q_proj.weight": "model-00028-of-00030.safetensors", + "model.layers.77.self_attn.v_proj.weight": "model-00028-of-00030.safetensors", + "model.layers.78.input_layernorm.weight": "model-00029-of-00030.safetensors", + "model.layers.78.mlp.down_proj.weight": "model-00029-of-00030.safetensors", + "model.layers.78.mlp.gate_proj.weight": "model-00029-of-00030.safetensors", + "model.layers.78.mlp.up_proj.weight": "model-00029-of-00030.safetensors", + "model.layers.78.post_attention_layernorm.weight": "model-00029-of-00030.safetensors", + "model.layers.78.self_attn.k_proj.weight": "model-00029-of-00030.safetensors", + "model.layers.78.self_attn.o_proj.weight": "model-00029-of-00030.safetensors", + "model.layers.78.self_attn.q_proj.weight": "model-00029-of-00030.safetensors", + "model.layers.78.self_attn.v_proj.weight": "model-00029-of-00030.safetensors", + "model.layers.79.input_layernorm.weight": "model-00029-of-00030.safetensors", + "model.layers.79.mlp.down_proj.weight": "model-00029-of-00030.safetensors", + "model.layers.79.mlp.gate_proj.weight": "model-00029-of-00030.safetensors", + "model.layers.79.mlp.up_proj.weight": "model-00029-of-00030.safetensors", + "model.layers.79.post_attention_layernorm.weight": "model-00029-of-00030.safetensors", + "model.layers.79.self_attn.k_proj.weight": "model-00029-of-00030.safetensors", + "model.layers.79.self_attn.o_proj.weight": "model-00029-of-00030.safetensors", + "model.layers.79.self_attn.q_proj.weight": "model-00029-of-00030.safetensors", + "model.layers.79.self_attn.v_proj.weight": "model-00029-of-00030.safetensors", + "model.layers.8.input_layernorm.weight": "model-00004-of-00030.safetensors", + "model.layers.8.mlp.down_proj.weight": "model-00004-of-00030.safetensors", + "model.layers.8.mlp.gate_proj.weight": "model-00004-of-00030.safetensors", + "model.layers.8.mlp.up_proj.weight": "model-00004-of-00030.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00004-of-00030.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00004-of-00030.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00004-of-00030.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00004-of-00030.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00004-of-00030.safetensors", + "model.layers.9.input_layernorm.weight": "model-00004-of-00030.safetensors", + "model.layers.9.mlp.down_proj.weight": "model-00004-of-00030.safetensors", + "model.layers.9.mlp.gate_proj.weight": "model-00004-of-00030.safetensors", + "model.layers.9.mlp.up_proj.weight": "model-00004-of-00030.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00004-of-00030.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00004-of-00030.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00004-of-00030.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00004-of-00030.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00004-of-00030.safetensors", + "model.norm.weight": "model-00029-of-00030.safetensors" } }