diff --git a/config.json b/config.json new file mode 100644 index 0000000000000000000000000000000000000000..75ab598d3462bccdc8c3568ff516d2b220e9fe37 --- /dev/null +++ b/config.json @@ -0,0 +1,30 @@ +{ + "_name_or_path": "gghfez/Mistral-Large-2407-LongCoT", + "architectures": [ + "MistralForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 1, + "eos_token_id": 2, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 12288, + "initializer_range": 0.02, + "intermediate_size": 28672, + "max_position_embeddings": 131072, + "model_type": "mistral", + "num_attention_heads": 96, + "num_hidden_layers": 88, + "num_key_value_heads": 8, + "pad_token_id": 750, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.47.0", + "unsloth_version": "2024.12.4", + "use_cache": true, + "vocab_size": 32768 +} diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..75b9eec82cf7eed550ca1e7f50231f0b01c9abfa --- /dev/null +++ b/generation_config.json @@ -0,0 +1,8 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 2, + "max_length": 131072, + "pad_token_id": 750, + "transformers_version": "4.47.0" +} diff --git a/model-00001-of-00051.safetensors b/model-00001-of-00051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a3195931dc0d29e4d7fdea1d17ddf80e2baabb0b --- /dev/null +++ b/model-00001-of-00051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76ae43e4fcc43649629ecc352df540ddc8bcde2ab583e2e6d8e4821fd51964c0 +size 4932552432 diff --git a/model-00002-of-00051.safetensors b/model-00002-of-00051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7fbd9bc2327ef64e4b5541d202c2d43ecf5e293f --- /dev/null +++ b/model-00002-of-00051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed13c16428d2e59b2312e08f66582fbb2f2af163d43cf6a21a9577d9f236dcbf +size 4831938528 diff --git a/model-00003-of-00051.safetensors b/model-00003-of-00051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b5f17beb4edd5cbda2a1972c2d479f65182a0ff7 --- /dev/null +++ b/model-00003-of-00051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b880e10b931bb64c50595e5e8d0e60a27e22fde2e7dc5f4fa9ac643e68490b25 +size 4882269816 diff --git a/model-00004-of-00051.safetensors b/model-00004-of-00051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0cca316adfb47ff6075f3a6c21f08592e182a81b --- /dev/null +++ b/model-00004-of-00051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7aaf1e5e5d294c42f71321bf055ec5991ad43ee328a12263ae70cba8162fe91e +size 4831889136 diff --git a/model-00005-of-00051.safetensors b/model-00005-of-00051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f509699904bc1649b65fff3ddd8d6331125cfba2 --- /dev/null +++ b/model-00005-of-00051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a97adc5969773fcc22db6aa8326a77fc4155936abe13c886181b9d49ccde5b6f +size 4831938520 diff --git a/model-00006-of-00051.safetensors b/model-00006-of-00051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b245023f709404f551d915230f7e37cd426bad4d --- /dev/null +++ b/model-00006-of-00051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8c3a1b4878d4e16c92aa48da597dde4b27ed2aea9d1c6446039a5acdb59e1db +size 4831938528 diff --git a/model-00007-of-00051.safetensors b/model-00007-of-00051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f8311aee3da57dd671d1effaf9168dedcf1b59ae --- /dev/null +++ b/model-00007-of-00051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9cc7e18bc4e625f62c0d9e9a1fca7bba103de528f7cc184404fd96210c4fe456 +size 4882269832 diff --git a/model-00008-of-00051.safetensors b/model-00008-of-00051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..067f35e6a319d4c3f07bc75b603e5e24461c1ff5 --- /dev/null +++ b/model-00008-of-00051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83e34d86e7b58c24a7b9351b13adb80bbe286947d62976518953605377ed7617 +size 4831889152 diff --git a/model-00009-of-00051.safetensors b/model-00009-of-00051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b732c0f8873a6e6db24ef3db65121faf78771246 --- /dev/null +++ b/model-00009-of-00051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67241060f6335e1e24c6f87be5e53d52b8224737c232d43f2e29a264f3be9401 +size 4831938536 diff --git a/model-00010-of-00051.safetensors b/model-00010-of-00051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..de333a59672c5b0c961a26968f2199292d81f5c9 --- /dev/null +++ b/model-00010-of-00051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e51049a69c30bdf30f5a975baccddbd5e22a40a9785be2364811d7eba129d691 +size 4831938544 diff --git a/model-00011-of-00051.safetensors b/model-00011-of-00051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e317469960b4aaaf86027f15ba663da2704017a0 --- /dev/null +++ b/model-00011-of-00051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:833381f9ff97dc249a7c1c01f68d53868424fd85b8bfba9e0d23b28b65f38489 +size 4882269832 diff --git a/model-00012-of-00051.safetensors b/model-00012-of-00051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e0b7795c650801dd03bc81e33ed2f8a4bcd01992 --- /dev/null +++ b/model-00012-of-00051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f45e7e53cc5d71a8ec7d2c67ac70aaa803614df9021ab971d9968f25e505814 +size 4831889152 diff --git a/model-00013-of-00051.safetensors b/model-00013-of-00051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4d4114268f1102a789e64026837b1446a38eb04f --- /dev/null +++ b/model-00013-of-00051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4eca6a5d127d3e4eabd862365f927ab1806b4736995dcbb27ec0b1af6523de2 +size 4831938536 diff --git a/model-00014-of-00051.safetensors b/model-00014-of-00051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9805bdc56940a6ecbc05422e46d634b8199c1a3d --- /dev/null +++ b/model-00014-of-00051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb07267f4254a128a45cd866b9c4480dc9ce64100b3cfb7d9bec9d6b4a479805 +size 4831938544 diff --git a/model-00015-of-00051.safetensors b/model-00015-of-00051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bd85b98ec6bbf496af689924569a23dee28778c7 --- /dev/null +++ b/model-00015-of-00051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f48fa5704808ab3f72a29429c5d8e373ffe69886757c2ffcb7c4c4cc5e843fc6 +size 4882269832 diff --git a/model-00016-of-00051.safetensors b/model-00016-of-00051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3e577417d24c3b3d9051c26a99909c7369bc0613 --- /dev/null +++ b/model-00016-of-00051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:262bff060921b35b0820ee5d572b760ff7e6f23bfd8702dbe70715d0da708294 +size 4831889152 diff --git a/model-00017-of-00051.safetensors b/model-00017-of-00051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b532be946d12856ad6e3e9e1329061f744a3fd5d --- /dev/null +++ b/model-00017-of-00051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a3968093e7ff9b5c90a4a5fb104395031e686e81dfdd04ae58cfe62f1e04383 +size 4831938536 diff --git a/model-00018-of-00051.safetensors b/model-00018-of-00051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3e8f257421178e09ccf254879de5c58498692f4a --- /dev/null +++ b/model-00018-of-00051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0359bc75d11f639cadab251dac3be46b92304685523d08fd4e4f806a4e5cdbf +size 4831938544 diff --git a/model-00019-of-00051.safetensors b/model-00019-of-00051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..21bcc54966aaec15886f1df6b817e47d8485dfbb --- /dev/null +++ b/model-00019-of-00051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c761559b345adc8940976b1ce243904e8803e72278c4e42f7733e980166bd95 +size 4882269832 diff --git a/model-00020-of-00051.safetensors b/model-00020-of-00051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..716d47fa17aa918203a521e69b68fd43b65b5b11 --- /dev/null +++ b/model-00020-of-00051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:137bc0cae6e332f7b8dd567bfca84f5f48a0be60e38d41f5f307295ccbdcea23 +size 4831889152 diff --git a/model-00021-of-00051.safetensors b/model-00021-of-00051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6459a13d5dfb022bae9898bba1d36e6840723f2a --- /dev/null +++ b/model-00021-of-00051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7731420f2e70b9ec7f50d99209485064e708becf4b579b975562ec0d7a622b6 +size 4831938536 diff --git a/model-00022-of-00051.safetensors b/model-00022-of-00051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..dbf6f2bf9cb471e210cb9eff292f07f8979773ae --- /dev/null +++ b/model-00022-of-00051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:635d93495d23db26ee096c6744ebd6ec3845073ff73556245b5f511e7ce182f7 +size 4831938544 diff --git a/model-00023-of-00051.safetensors b/model-00023-of-00051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..15a6ffe5efa2a25fc9f51719e04d9056cc26f49e --- /dev/null +++ b/model-00023-of-00051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6242395f202b98dcf111c9e9e5e0708d8b353e1ad247af58215619643a8f7a83 +size 4882269832 diff --git a/model-00024-of-00051.safetensors b/model-00024-of-00051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f39b4fe7f74db02793b14e76a6881c7d5ebf8196 --- /dev/null +++ b/model-00024-of-00051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78c26c3f57936a687c9e8a8fbfb20148e327ed25f911c908d9045cc93faabf8f +size 4831889152 diff --git a/model-00025-of-00051.safetensors b/model-00025-of-00051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..98b417ba06387ca981fc5f0a0f99c6914c7b2610 --- /dev/null +++ b/model-00025-of-00051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d705e4f0e5f2e4e2a603afd171d439e5b730198447724dafc483b7897ed0a86f +size 4831938536 diff --git a/model-00026-of-00051.safetensors b/model-00026-of-00051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fe43cbe856433d041c7c4d5b3c74648b2c10b4a3 --- /dev/null +++ b/model-00026-of-00051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0918e7c18635aa9f85f1cc6eac17ed9ea3a90a9dd27c12539e31ce84bc2914e6 +size 4831938544 diff --git a/model-00027-of-00051.safetensors b/model-00027-of-00051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..52f6d116dce1404bac4a0748e4ec7956fbd4b4d8 --- /dev/null +++ b/model-00027-of-00051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de469052d9c0c65adfec44acbc1ade859aab849b3d95e6b7ca5d2af04786669e +size 4882269832 diff --git a/model-00028-of-00051.safetensors b/model-00028-of-00051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b8252e990bdf776b027abe3be85f342a20363039 --- /dev/null +++ b/model-00028-of-00051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3783314d52062984ce6b845ca7df63e8ba8153e3bba41c1bf8356ee9351cc65e +size 4831889152 diff --git a/model-00029-of-00051.safetensors b/model-00029-of-00051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..68a42711047d990009ade51d25b3322537d30e2e --- /dev/null +++ b/model-00029-of-00051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c28a6cbc7aeb4df59f2d49effd9042afacb6a5d3968b2669723033872e024ad +size 4831938536 diff --git a/model-00030-of-00051.safetensors b/model-00030-of-00051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e32e49e6add5a299408e3a71dec00480bf544b03 --- /dev/null +++ b/model-00030-of-00051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a1289c730c0e003bc3e0543cf3d12c22564bcfa8f81b1af7440ac4b4c454915 +size 4831938544 diff --git a/model-00031-of-00051.safetensors b/model-00031-of-00051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c41542bdd958eed49ff6f705a97e2dc079fe2bd1 --- /dev/null +++ b/model-00031-of-00051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b1523cce30578507cb8266482bfac41a7191802d96ba52b84041a89f9527a59 +size 4882269832 diff --git a/model-00032-of-00051.safetensors b/model-00032-of-00051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..96248b03ac63a11afa29e2828577f38c2c2effec --- /dev/null +++ b/model-00032-of-00051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74b01a09f54016cfa396b86aa0a7266f7fa350fabfadba90e191adc8ccbdd3f8 +size 4831889152 diff --git a/model-00033-of-00051.safetensors b/model-00033-of-00051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d1238ec90486ab319336bac2f1dc3c4808d04095 --- /dev/null +++ b/model-00033-of-00051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59d7e835ecf417cf0469df167129c67bb61c4075e698b88fd02d980549313f7e +size 4831938536 diff --git a/model-00034-of-00051.safetensors b/model-00034-of-00051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..82c5169f32141334dfdd74e3612815d6fd4bdd5d --- /dev/null +++ b/model-00034-of-00051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f801b009a803205f74caf90b0ad49a35266cfeb35987b5896678d9582d3e9925 +size 4831938544 diff --git a/model-00035-of-00051.safetensors b/model-00035-of-00051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..26e2d1fa3d7f70adefe4092f62e1dc84e48f0273 --- /dev/null +++ b/model-00035-of-00051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5795b4a3de96dff1b35211f9e4547d0ad6ede6c697199b187c5d0b5f2549bb9c +size 4882269832 diff --git a/model-00036-of-00051.safetensors b/model-00036-of-00051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f28b1ac83136ac8da76a4a0661e7a028d61092b6 --- /dev/null +++ b/model-00036-of-00051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8871aebd98eddd4fb787298a76924914ea0e584988a2396cb1238a156b2a2125 +size 4831889152 diff --git a/model-00037-of-00051.safetensors b/model-00037-of-00051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..733a43b8a5dd360d4d0d450d21e5665532205b4d --- /dev/null +++ b/model-00037-of-00051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:971ee95b65146c19041583487d42c13dc1b89a6a2b77d423d80b55b9354cf8e6 +size 4831938536 diff --git a/model-00038-of-00051.safetensors b/model-00038-of-00051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d413e1a30358ee7b7dda19f3279fa8700ba92ee6 --- /dev/null +++ b/model-00038-of-00051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0eb3aee633f3d54fd999981538842369116d2f71e1d742193ba31fa6bca6b17 +size 4831938544 diff --git a/model-00039-of-00051.safetensors b/model-00039-of-00051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..361de8fbc8dcac68c8aeb4d9ab397f3758c620fe --- /dev/null +++ b/model-00039-of-00051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47872da5954585033e998812103ced9e5eeca01f23c91efb4e5fd8a4ee5f59b4 +size 4882269832 diff --git a/model-00040-of-00051.safetensors b/model-00040-of-00051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..25824ed75ec0cf2d1befa6f169170a75a3fed891 --- /dev/null +++ b/model-00040-of-00051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6563706e47199a9c9ec40db5a56cfe743157ed213e07b88da380388e4ce69fdc +size 4831889152 diff --git a/model-00041-of-00051.safetensors b/model-00041-of-00051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a88512cd16e3227b5fb48453d7ff8bc272af1590 --- /dev/null +++ b/model-00041-of-00051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eeb654ce5697cde9871ffd91f224a2f0a12a63f607082f18c627385caf039900 +size 4831938536 diff --git a/model-00042-of-00051.safetensors b/model-00042-of-00051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f66066a49d06217e5f933d42d9b7ca2ee0887bb0 --- /dev/null +++ b/model-00042-of-00051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01c6fa3d6bf0386d119d2dd2f56325a5c89d96f8c19b06ba5c62d0a744a649cc +size 4831938544 diff --git a/model-00043-of-00051.safetensors b/model-00043-of-00051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..afbe0758113b0db5bf11ba26f7b0a09a6cf8f3c7 --- /dev/null +++ b/model-00043-of-00051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8bd9feeda42a2f2da07ce85665ebe3ab7a21c332a5889a6622bfbd2ed2b299f9 +size 4882269832 diff --git a/model-00044-of-00051.safetensors b/model-00044-of-00051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4c61419675754e060cadfcfb0b0b6809c7e24041 --- /dev/null +++ b/model-00044-of-00051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78ebd4af306b542d4f99fd197fdd60a8480e9b369ee00870d86025408b2ca89a +size 4831889152 diff --git a/model-00045-of-00051.safetensors b/model-00045-of-00051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..97db684448e75be62faf4f8a65d97113304ed072 --- /dev/null +++ b/model-00045-of-00051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:196d71184c449d4af54f64ffc155300ad8ad0d11ea86da87ae65f330ee285784 +size 4831938536 diff --git a/model-00046-of-00051.safetensors b/model-00046-of-00051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7286eec2b11944ddebb6b26594984ed6d6476443 --- /dev/null +++ b/model-00046-of-00051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2b2f34d5a70c5867bb0a808b5b70ca3433272c3a5cfb9053214e16ce1763004 +size 4831938544 diff --git a/model-00047-of-00051.safetensors b/model-00047-of-00051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3f1ca3969fc0e890ca74ec8c33f64ad2896e3125 --- /dev/null +++ b/model-00047-of-00051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8898b6e0ce61182d0da389e42a391b5ce0633eea1ff3f5985220a3576eea0f98 +size 4882269832 diff --git a/model-00048-of-00051.safetensors b/model-00048-of-00051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bb7333c69bd7b8eb4e3249828cccfe3aadddda15 --- /dev/null +++ b/model-00048-of-00051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62959d69a67d6f6888263af19379a5abe9ac3b68a711117695974c5ef716e83e +size 4831889152 diff --git a/model-00049-of-00051.safetensors b/model-00049-of-00051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..81dcc473a835074b014dc6789d8ec04af7311ffe --- /dev/null +++ b/model-00049-of-00051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e955f2acd78a1a51d8a873e0d100a9adcd025aaeffb03d811502e0e074cd063 +size 4831938536 diff --git a/model-00050-of-00051.safetensors b/model-00050-of-00051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9af4907d26327b3fa5dddae0ef3895d788856ac1 --- /dev/null +++ b/model-00050-of-00051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8cb21b522d47809024c4c8044111421f8c88086da284d177294e8f2c3a233423 +size 4831938544 diff --git a/model-00051-of-00051.safetensors b/model-00051-of-00051.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..16066e9b2b88a562deda8d897ffd254e17723f4f --- /dev/null +++ b/model-00051-of-00051.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2fb602e9897cbde40a43abbd65230ef7326525271ca4a397c07e9e2fd3cb5daf +size 2919310112 diff --git a/model.safetensors.index.json b/model.safetensors.index.json new file mode 100644 index 0000000000000000000000000000000000000000..43cb90c23f416f3bb649d50540706cf4384b2907 --- /dev/null +++ b/model.safetensors.index.json @@ -0,0 +1,802 @@ +{ + "metadata": { + "total_size": 245220139008 + }, + "weight_map": { + "lm_head.weight": "model-00051-of-00051.safetensors", + "model.embed_tokens.weight": "model-00001-of-00051.safetensors", + "model.layers.0.input_layernorm.weight": "model-00001-of-00051.safetensors", + "model.layers.0.mlp.down_proj.weight": "model-00001-of-00051.safetensors", + "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00051.safetensors", + "model.layers.0.mlp.up_proj.weight": "model-00001-of-00051.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00051.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00051.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00051.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00051.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00051.safetensors", + "model.layers.1.input_layernorm.weight": "model-00002-of-00051.safetensors", + "model.layers.1.mlp.down_proj.weight": "model-00002-of-00051.safetensors", + "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00051.safetensors", + "model.layers.1.mlp.up_proj.weight": "model-00002-of-00051.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00002-of-00051.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00051.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00051.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00051.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00051.safetensors", + "model.layers.10.input_layernorm.weight": "model-00007-of-00051.safetensors", + "model.layers.10.mlp.down_proj.weight": "model-00007-of-00051.safetensors", + "model.layers.10.mlp.gate_proj.weight": "model-00007-of-00051.safetensors", + "model.layers.10.mlp.up_proj.weight": "model-00007-of-00051.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00007-of-00051.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00006-of-00051.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00006-of-00051.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00006-of-00051.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00006-of-00051.safetensors", + "model.layers.11.input_layernorm.weight": "model-00007-of-00051.safetensors", + "model.layers.11.mlp.down_proj.weight": "model-00007-of-00051.safetensors", + "model.layers.11.mlp.gate_proj.weight": "model-00007-of-00051.safetensors", + "model.layers.11.mlp.up_proj.weight": "model-00007-of-00051.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00007-of-00051.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00007-of-00051.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00007-of-00051.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00007-of-00051.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00007-of-00051.safetensors", + "model.layers.12.input_layernorm.weight": "model-00008-of-00051.safetensors", + "model.layers.12.mlp.down_proj.weight": "model-00008-of-00051.safetensors", + "model.layers.12.mlp.gate_proj.weight": "model-00008-of-00051.safetensors", + "model.layers.12.mlp.up_proj.weight": "model-00008-of-00051.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00008-of-00051.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00008-of-00051.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00008-of-00051.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00008-of-00051.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00008-of-00051.safetensors", + "model.layers.13.input_layernorm.weight": "model-00009-of-00051.safetensors", + "model.layers.13.mlp.down_proj.weight": "model-00009-of-00051.safetensors", + "model.layers.13.mlp.gate_proj.weight": "model-00008-of-00051.safetensors", + "model.layers.13.mlp.up_proj.weight": "model-00008-of-00051.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00009-of-00051.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00008-of-00051.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00008-of-00051.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00008-of-00051.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00008-of-00051.safetensors", + "model.layers.14.input_layernorm.weight": "model-00009-of-00051.safetensors", + "model.layers.14.mlp.down_proj.weight": "model-00009-of-00051.safetensors", + "model.layers.14.mlp.gate_proj.weight": "model-00009-of-00051.safetensors", + "model.layers.14.mlp.up_proj.weight": "model-00009-of-00051.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00009-of-00051.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00009-of-00051.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00009-of-00051.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00009-of-00051.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00009-of-00051.safetensors", + "model.layers.15.input_layernorm.weight": "model-00010-of-00051.safetensors", + "model.layers.15.mlp.down_proj.weight": "model-00010-of-00051.safetensors", + "model.layers.15.mlp.gate_proj.weight": "model-00009-of-00051.safetensors", + "model.layers.15.mlp.up_proj.weight": "model-00010-of-00051.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00010-of-00051.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00009-of-00051.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00009-of-00051.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00009-of-00051.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00009-of-00051.safetensors", + "model.layers.16.input_layernorm.weight": "model-00010-of-00051.safetensors", + "model.layers.16.mlp.down_proj.weight": "model-00010-of-00051.safetensors", + "model.layers.16.mlp.gate_proj.weight": "model-00010-of-00051.safetensors", + "model.layers.16.mlp.up_proj.weight": "model-00010-of-00051.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00010-of-00051.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00010-of-00051.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00010-of-00051.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00010-of-00051.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00010-of-00051.safetensors", + "model.layers.17.input_layernorm.weight": "model-00011-of-00051.safetensors", + "model.layers.17.mlp.down_proj.weight": "model-00011-of-00051.safetensors", + "model.layers.17.mlp.gate_proj.weight": "model-00011-of-00051.safetensors", + "model.layers.17.mlp.up_proj.weight": "model-00011-of-00051.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00011-of-00051.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00010-of-00051.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00010-of-00051.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00010-of-00051.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00010-of-00051.safetensors", + "model.layers.18.input_layernorm.weight": "model-00011-of-00051.safetensors", + "model.layers.18.mlp.down_proj.weight": "model-00011-of-00051.safetensors", + "model.layers.18.mlp.gate_proj.weight": "model-00011-of-00051.safetensors", + "model.layers.18.mlp.up_proj.weight": "model-00011-of-00051.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00011-of-00051.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00011-of-00051.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00011-of-00051.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00011-of-00051.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00011-of-00051.safetensors", + "model.layers.19.input_layernorm.weight": "model-00012-of-00051.safetensors", + "model.layers.19.mlp.down_proj.weight": "model-00012-of-00051.safetensors", + "model.layers.19.mlp.gate_proj.weight": "model-00012-of-00051.safetensors", + "model.layers.19.mlp.up_proj.weight": "model-00012-of-00051.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00012-of-00051.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00012-of-00051.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00012-of-00051.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00012-of-00051.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00012-of-00051.safetensors", + "model.layers.2.input_layernorm.weight": "model-00002-of-00051.safetensors", + "model.layers.2.mlp.down_proj.weight": "model-00002-of-00051.safetensors", + "model.layers.2.mlp.gate_proj.weight": "model-00002-of-00051.safetensors", + "model.layers.2.mlp.up_proj.weight": "model-00002-of-00051.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00002-of-00051.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00002-of-00051.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00002-of-00051.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00002-of-00051.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00002-of-00051.safetensors", + "model.layers.20.input_layernorm.weight": "model-00013-of-00051.safetensors", + "model.layers.20.mlp.down_proj.weight": "model-00013-of-00051.safetensors", + "model.layers.20.mlp.gate_proj.weight": "model-00012-of-00051.safetensors", + "model.layers.20.mlp.up_proj.weight": "model-00012-of-00051.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00013-of-00051.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00012-of-00051.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00012-of-00051.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00012-of-00051.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00012-of-00051.safetensors", + "model.layers.21.input_layernorm.weight": "model-00013-of-00051.safetensors", + "model.layers.21.mlp.down_proj.weight": "model-00013-of-00051.safetensors", + "model.layers.21.mlp.gate_proj.weight": "model-00013-of-00051.safetensors", + "model.layers.21.mlp.up_proj.weight": "model-00013-of-00051.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00013-of-00051.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00013-of-00051.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00013-of-00051.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00013-of-00051.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00013-of-00051.safetensors", + "model.layers.22.input_layernorm.weight": "model-00014-of-00051.safetensors", + "model.layers.22.mlp.down_proj.weight": "model-00014-of-00051.safetensors", + "model.layers.22.mlp.gate_proj.weight": "model-00013-of-00051.safetensors", + "model.layers.22.mlp.up_proj.weight": "model-00014-of-00051.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00014-of-00051.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00013-of-00051.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00013-of-00051.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00013-of-00051.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00013-of-00051.safetensors", + "model.layers.23.input_layernorm.weight": "model-00014-of-00051.safetensors", + "model.layers.23.mlp.down_proj.weight": "model-00014-of-00051.safetensors", + "model.layers.23.mlp.gate_proj.weight": "model-00014-of-00051.safetensors", + "model.layers.23.mlp.up_proj.weight": "model-00014-of-00051.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00014-of-00051.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00014-of-00051.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00014-of-00051.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00014-of-00051.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00014-of-00051.safetensors", + "model.layers.24.input_layernorm.weight": "model-00015-of-00051.safetensors", + "model.layers.24.mlp.down_proj.weight": "model-00015-of-00051.safetensors", + "model.layers.24.mlp.gate_proj.weight": "model-00015-of-00051.safetensors", + "model.layers.24.mlp.up_proj.weight": "model-00015-of-00051.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00015-of-00051.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00014-of-00051.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00014-of-00051.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00014-of-00051.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00014-of-00051.safetensors", + "model.layers.25.input_layernorm.weight": "model-00015-of-00051.safetensors", + "model.layers.25.mlp.down_proj.weight": "model-00015-of-00051.safetensors", + "model.layers.25.mlp.gate_proj.weight": "model-00015-of-00051.safetensors", + "model.layers.25.mlp.up_proj.weight": "model-00015-of-00051.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00015-of-00051.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00015-of-00051.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00015-of-00051.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00015-of-00051.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00015-of-00051.safetensors", + "model.layers.26.input_layernorm.weight": "model-00016-of-00051.safetensors", + "model.layers.26.mlp.down_proj.weight": "model-00016-of-00051.safetensors", + "model.layers.26.mlp.gate_proj.weight": "model-00016-of-00051.safetensors", + "model.layers.26.mlp.up_proj.weight": "model-00016-of-00051.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00016-of-00051.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00016-of-00051.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00016-of-00051.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00016-of-00051.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00016-of-00051.safetensors", + "model.layers.27.input_layernorm.weight": "model-00017-of-00051.safetensors", + "model.layers.27.mlp.down_proj.weight": "model-00017-of-00051.safetensors", + "model.layers.27.mlp.gate_proj.weight": "model-00016-of-00051.safetensors", + "model.layers.27.mlp.up_proj.weight": "model-00016-of-00051.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00017-of-00051.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00016-of-00051.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00016-of-00051.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00016-of-00051.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00016-of-00051.safetensors", + "model.layers.28.input_layernorm.weight": "model-00017-of-00051.safetensors", + "model.layers.28.mlp.down_proj.weight": "model-00017-of-00051.safetensors", + "model.layers.28.mlp.gate_proj.weight": "model-00017-of-00051.safetensors", + "model.layers.28.mlp.up_proj.weight": "model-00017-of-00051.safetensors", + "model.layers.28.post_attention_layernorm.weight": "model-00017-of-00051.safetensors", + "model.layers.28.self_attn.k_proj.weight": "model-00017-of-00051.safetensors", + "model.layers.28.self_attn.o_proj.weight": "model-00017-of-00051.safetensors", + "model.layers.28.self_attn.q_proj.weight": "model-00017-of-00051.safetensors", + "model.layers.28.self_attn.v_proj.weight": "model-00017-of-00051.safetensors", + "model.layers.29.input_layernorm.weight": "model-00018-of-00051.safetensors", + "model.layers.29.mlp.down_proj.weight": "model-00018-of-00051.safetensors", + "model.layers.29.mlp.gate_proj.weight": "model-00017-of-00051.safetensors", + "model.layers.29.mlp.up_proj.weight": "model-00018-of-00051.safetensors", + "model.layers.29.post_attention_layernorm.weight": "model-00018-of-00051.safetensors", + "model.layers.29.self_attn.k_proj.weight": "model-00017-of-00051.safetensors", + "model.layers.29.self_attn.o_proj.weight": "model-00017-of-00051.safetensors", + "model.layers.29.self_attn.q_proj.weight": "model-00017-of-00051.safetensors", + "model.layers.29.self_attn.v_proj.weight": "model-00017-of-00051.safetensors", + "model.layers.3.input_layernorm.weight": "model-00003-of-00051.safetensors", + "model.layers.3.mlp.down_proj.weight": "model-00003-of-00051.safetensors", + "model.layers.3.mlp.gate_proj.weight": "model-00003-of-00051.safetensors", + "model.layers.3.mlp.up_proj.weight": "model-00003-of-00051.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00003-of-00051.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00002-of-00051.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00002-of-00051.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00002-of-00051.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00002-of-00051.safetensors", + "model.layers.30.input_layernorm.weight": "model-00018-of-00051.safetensors", + "model.layers.30.mlp.down_proj.weight": "model-00018-of-00051.safetensors", + "model.layers.30.mlp.gate_proj.weight": "model-00018-of-00051.safetensors", + "model.layers.30.mlp.up_proj.weight": "model-00018-of-00051.safetensors", + "model.layers.30.post_attention_layernorm.weight": "model-00018-of-00051.safetensors", + "model.layers.30.self_attn.k_proj.weight": "model-00018-of-00051.safetensors", + "model.layers.30.self_attn.o_proj.weight": "model-00018-of-00051.safetensors", + "model.layers.30.self_attn.q_proj.weight": "model-00018-of-00051.safetensors", + "model.layers.30.self_attn.v_proj.weight": "model-00018-of-00051.safetensors", + "model.layers.31.input_layernorm.weight": "model-00019-of-00051.safetensors", + "model.layers.31.mlp.down_proj.weight": "model-00019-of-00051.safetensors", + "model.layers.31.mlp.gate_proj.weight": "model-00019-of-00051.safetensors", + "model.layers.31.mlp.up_proj.weight": "model-00019-of-00051.safetensors", + "model.layers.31.post_attention_layernorm.weight": "model-00019-of-00051.safetensors", + "model.layers.31.self_attn.k_proj.weight": "model-00018-of-00051.safetensors", + "model.layers.31.self_attn.o_proj.weight": "model-00018-of-00051.safetensors", + "model.layers.31.self_attn.q_proj.weight": "model-00018-of-00051.safetensors", + "model.layers.31.self_attn.v_proj.weight": "model-00018-of-00051.safetensors", + "model.layers.32.input_layernorm.weight": "model-00019-of-00051.safetensors", + "model.layers.32.mlp.down_proj.weight": "model-00019-of-00051.safetensors", + "model.layers.32.mlp.gate_proj.weight": "model-00019-of-00051.safetensors", + "model.layers.32.mlp.up_proj.weight": "model-00019-of-00051.safetensors", + "model.layers.32.post_attention_layernorm.weight": "model-00019-of-00051.safetensors", + "model.layers.32.self_attn.k_proj.weight": "model-00019-of-00051.safetensors", + "model.layers.32.self_attn.o_proj.weight": "model-00019-of-00051.safetensors", + "model.layers.32.self_attn.q_proj.weight": "model-00019-of-00051.safetensors", + "model.layers.32.self_attn.v_proj.weight": "model-00019-of-00051.safetensors", + "model.layers.33.input_layernorm.weight": "model-00020-of-00051.safetensors", + "model.layers.33.mlp.down_proj.weight": "model-00020-of-00051.safetensors", + "model.layers.33.mlp.gate_proj.weight": "model-00020-of-00051.safetensors", + "model.layers.33.mlp.up_proj.weight": "model-00020-of-00051.safetensors", + "model.layers.33.post_attention_layernorm.weight": "model-00020-of-00051.safetensors", + "model.layers.33.self_attn.k_proj.weight": "model-00020-of-00051.safetensors", + "model.layers.33.self_attn.o_proj.weight": "model-00020-of-00051.safetensors", + "model.layers.33.self_attn.q_proj.weight": "model-00020-of-00051.safetensors", + "model.layers.33.self_attn.v_proj.weight": "model-00020-of-00051.safetensors", + "model.layers.34.input_layernorm.weight": "model-00021-of-00051.safetensors", + "model.layers.34.mlp.down_proj.weight": "model-00021-of-00051.safetensors", + "model.layers.34.mlp.gate_proj.weight": "model-00020-of-00051.safetensors", + "model.layers.34.mlp.up_proj.weight": "model-00020-of-00051.safetensors", + "model.layers.34.post_attention_layernorm.weight": "model-00021-of-00051.safetensors", + "model.layers.34.self_attn.k_proj.weight": "model-00020-of-00051.safetensors", + "model.layers.34.self_attn.o_proj.weight": "model-00020-of-00051.safetensors", + "model.layers.34.self_attn.q_proj.weight": "model-00020-of-00051.safetensors", + "model.layers.34.self_attn.v_proj.weight": "model-00020-of-00051.safetensors", + "model.layers.35.input_layernorm.weight": "model-00021-of-00051.safetensors", + "model.layers.35.mlp.down_proj.weight": "model-00021-of-00051.safetensors", + "model.layers.35.mlp.gate_proj.weight": "model-00021-of-00051.safetensors", + "model.layers.35.mlp.up_proj.weight": "model-00021-of-00051.safetensors", + "model.layers.35.post_attention_layernorm.weight": "model-00021-of-00051.safetensors", + "model.layers.35.self_attn.k_proj.weight": "model-00021-of-00051.safetensors", + "model.layers.35.self_attn.o_proj.weight": "model-00021-of-00051.safetensors", + "model.layers.35.self_attn.q_proj.weight": "model-00021-of-00051.safetensors", + "model.layers.35.self_attn.v_proj.weight": "model-00021-of-00051.safetensors", + "model.layers.36.input_layernorm.weight": "model-00022-of-00051.safetensors", + "model.layers.36.mlp.down_proj.weight": "model-00022-of-00051.safetensors", + "model.layers.36.mlp.gate_proj.weight": "model-00021-of-00051.safetensors", + "model.layers.36.mlp.up_proj.weight": "model-00022-of-00051.safetensors", + "model.layers.36.post_attention_layernorm.weight": "model-00022-of-00051.safetensors", + "model.layers.36.self_attn.k_proj.weight": "model-00021-of-00051.safetensors", + "model.layers.36.self_attn.o_proj.weight": "model-00021-of-00051.safetensors", + "model.layers.36.self_attn.q_proj.weight": "model-00021-of-00051.safetensors", + "model.layers.36.self_attn.v_proj.weight": "model-00021-of-00051.safetensors", + "model.layers.37.input_layernorm.weight": "model-00022-of-00051.safetensors", + "model.layers.37.mlp.down_proj.weight": "model-00022-of-00051.safetensors", + "model.layers.37.mlp.gate_proj.weight": "model-00022-of-00051.safetensors", + "model.layers.37.mlp.up_proj.weight": "model-00022-of-00051.safetensors", + "model.layers.37.post_attention_layernorm.weight": "model-00022-of-00051.safetensors", + "model.layers.37.self_attn.k_proj.weight": "model-00022-of-00051.safetensors", + "model.layers.37.self_attn.o_proj.weight": "model-00022-of-00051.safetensors", + "model.layers.37.self_attn.q_proj.weight": "model-00022-of-00051.safetensors", + "model.layers.37.self_attn.v_proj.weight": "model-00022-of-00051.safetensors", + "model.layers.38.input_layernorm.weight": "model-00023-of-00051.safetensors", + "model.layers.38.mlp.down_proj.weight": "model-00023-of-00051.safetensors", + "model.layers.38.mlp.gate_proj.weight": "model-00023-of-00051.safetensors", + "model.layers.38.mlp.up_proj.weight": "model-00023-of-00051.safetensors", + "model.layers.38.post_attention_layernorm.weight": "model-00023-of-00051.safetensors", + "model.layers.38.self_attn.k_proj.weight": "model-00022-of-00051.safetensors", + "model.layers.38.self_attn.o_proj.weight": "model-00022-of-00051.safetensors", + "model.layers.38.self_attn.q_proj.weight": "model-00022-of-00051.safetensors", + "model.layers.38.self_attn.v_proj.weight": "model-00022-of-00051.safetensors", + "model.layers.39.input_layernorm.weight": "model-00023-of-00051.safetensors", + "model.layers.39.mlp.down_proj.weight": "model-00023-of-00051.safetensors", + "model.layers.39.mlp.gate_proj.weight": "model-00023-of-00051.safetensors", + "model.layers.39.mlp.up_proj.weight": "model-00023-of-00051.safetensors", + "model.layers.39.post_attention_layernorm.weight": "model-00023-of-00051.safetensors", + "model.layers.39.self_attn.k_proj.weight": "model-00023-of-00051.safetensors", + "model.layers.39.self_attn.o_proj.weight": "model-00023-of-00051.safetensors", + "model.layers.39.self_attn.q_proj.weight": "model-00023-of-00051.safetensors", + "model.layers.39.self_attn.v_proj.weight": "model-00023-of-00051.safetensors", + "model.layers.4.input_layernorm.weight": "model-00003-of-00051.safetensors", + "model.layers.4.mlp.down_proj.weight": "model-00003-of-00051.safetensors", + "model.layers.4.mlp.gate_proj.weight": "model-00003-of-00051.safetensors", + "model.layers.4.mlp.up_proj.weight": "model-00003-of-00051.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00003-of-00051.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00003-of-00051.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00003-of-00051.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00003-of-00051.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00003-of-00051.safetensors", + "model.layers.40.input_layernorm.weight": "model-00024-of-00051.safetensors", + "model.layers.40.mlp.down_proj.weight": "model-00024-of-00051.safetensors", + "model.layers.40.mlp.gate_proj.weight": "model-00024-of-00051.safetensors", + "model.layers.40.mlp.up_proj.weight": "model-00024-of-00051.safetensors", + "model.layers.40.post_attention_layernorm.weight": "model-00024-of-00051.safetensors", + "model.layers.40.self_attn.k_proj.weight": "model-00024-of-00051.safetensors", + "model.layers.40.self_attn.o_proj.weight": "model-00024-of-00051.safetensors", + "model.layers.40.self_attn.q_proj.weight": "model-00024-of-00051.safetensors", + "model.layers.40.self_attn.v_proj.weight": "model-00024-of-00051.safetensors", + "model.layers.41.input_layernorm.weight": "model-00025-of-00051.safetensors", + "model.layers.41.mlp.down_proj.weight": "model-00025-of-00051.safetensors", + "model.layers.41.mlp.gate_proj.weight": "model-00024-of-00051.safetensors", + "model.layers.41.mlp.up_proj.weight": "model-00024-of-00051.safetensors", + "model.layers.41.post_attention_layernorm.weight": "model-00025-of-00051.safetensors", + "model.layers.41.self_attn.k_proj.weight": "model-00024-of-00051.safetensors", + "model.layers.41.self_attn.o_proj.weight": "model-00024-of-00051.safetensors", + "model.layers.41.self_attn.q_proj.weight": "model-00024-of-00051.safetensors", + "model.layers.41.self_attn.v_proj.weight": "model-00024-of-00051.safetensors", + "model.layers.42.input_layernorm.weight": "model-00025-of-00051.safetensors", + "model.layers.42.mlp.down_proj.weight": "model-00025-of-00051.safetensors", + "model.layers.42.mlp.gate_proj.weight": "model-00025-of-00051.safetensors", + "model.layers.42.mlp.up_proj.weight": "model-00025-of-00051.safetensors", + "model.layers.42.post_attention_layernorm.weight": "model-00025-of-00051.safetensors", + "model.layers.42.self_attn.k_proj.weight": "model-00025-of-00051.safetensors", + "model.layers.42.self_attn.o_proj.weight": "model-00025-of-00051.safetensors", + "model.layers.42.self_attn.q_proj.weight": "model-00025-of-00051.safetensors", + "model.layers.42.self_attn.v_proj.weight": "model-00025-of-00051.safetensors", + "model.layers.43.input_layernorm.weight": "model-00026-of-00051.safetensors", + "model.layers.43.mlp.down_proj.weight": "model-00026-of-00051.safetensors", + "model.layers.43.mlp.gate_proj.weight": "model-00025-of-00051.safetensors", + "model.layers.43.mlp.up_proj.weight": "model-00026-of-00051.safetensors", + "model.layers.43.post_attention_layernorm.weight": "model-00026-of-00051.safetensors", + "model.layers.43.self_attn.k_proj.weight": "model-00025-of-00051.safetensors", + "model.layers.43.self_attn.o_proj.weight": "model-00025-of-00051.safetensors", + "model.layers.43.self_attn.q_proj.weight": "model-00025-of-00051.safetensors", + "model.layers.43.self_attn.v_proj.weight": "model-00025-of-00051.safetensors", + "model.layers.44.input_layernorm.weight": "model-00026-of-00051.safetensors", + "model.layers.44.mlp.down_proj.weight": "model-00026-of-00051.safetensors", + "model.layers.44.mlp.gate_proj.weight": "model-00026-of-00051.safetensors", + "model.layers.44.mlp.up_proj.weight": "model-00026-of-00051.safetensors", + "model.layers.44.post_attention_layernorm.weight": "model-00026-of-00051.safetensors", + "model.layers.44.self_attn.k_proj.weight": "model-00026-of-00051.safetensors", + "model.layers.44.self_attn.o_proj.weight": "model-00026-of-00051.safetensors", + "model.layers.44.self_attn.q_proj.weight": "model-00026-of-00051.safetensors", + "model.layers.44.self_attn.v_proj.weight": "model-00026-of-00051.safetensors", + "model.layers.45.input_layernorm.weight": "model-00027-of-00051.safetensors", + "model.layers.45.mlp.down_proj.weight": "model-00027-of-00051.safetensors", + "model.layers.45.mlp.gate_proj.weight": "model-00027-of-00051.safetensors", + "model.layers.45.mlp.up_proj.weight": "model-00027-of-00051.safetensors", + "model.layers.45.post_attention_layernorm.weight": "model-00027-of-00051.safetensors", + "model.layers.45.self_attn.k_proj.weight": "model-00026-of-00051.safetensors", + "model.layers.45.self_attn.o_proj.weight": "model-00026-of-00051.safetensors", + "model.layers.45.self_attn.q_proj.weight": "model-00026-of-00051.safetensors", + "model.layers.45.self_attn.v_proj.weight": "model-00026-of-00051.safetensors", + "model.layers.46.input_layernorm.weight": "model-00027-of-00051.safetensors", + "model.layers.46.mlp.down_proj.weight": "model-00027-of-00051.safetensors", + "model.layers.46.mlp.gate_proj.weight": "model-00027-of-00051.safetensors", + "model.layers.46.mlp.up_proj.weight": "model-00027-of-00051.safetensors", + "model.layers.46.post_attention_layernorm.weight": "model-00027-of-00051.safetensors", + "model.layers.46.self_attn.k_proj.weight": "model-00027-of-00051.safetensors", + "model.layers.46.self_attn.o_proj.weight": "model-00027-of-00051.safetensors", + "model.layers.46.self_attn.q_proj.weight": "model-00027-of-00051.safetensors", + "model.layers.46.self_attn.v_proj.weight": "model-00027-of-00051.safetensors", + "model.layers.47.input_layernorm.weight": "model-00028-of-00051.safetensors", + "model.layers.47.mlp.down_proj.weight": "model-00028-of-00051.safetensors", + "model.layers.47.mlp.gate_proj.weight": "model-00028-of-00051.safetensors", + "model.layers.47.mlp.up_proj.weight": "model-00028-of-00051.safetensors", + "model.layers.47.post_attention_layernorm.weight": "model-00028-of-00051.safetensors", + "model.layers.47.self_attn.k_proj.weight": "model-00028-of-00051.safetensors", + "model.layers.47.self_attn.o_proj.weight": "model-00028-of-00051.safetensors", + "model.layers.47.self_attn.q_proj.weight": "model-00028-of-00051.safetensors", + "model.layers.47.self_attn.v_proj.weight": "model-00028-of-00051.safetensors", + "model.layers.48.input_layernorm.weight": "model-00029-of-00051.safetensors", + "model.layers.48.mlp.down_proj.weight": "model-00029-of-00051.safetensors", + "model.layers.48.mlp.gate_proj.weight": "model-00028-of-00051.safetensors", + "model.layers.48.mlp.up_proj.weight": "model-00028-of-00051.safetensors", + "model.layers.48.post_attention_layernorm.weight": "model-00029-of-00051.safetensors", + "model.layers.48.self_attn.k_proj.weight": "model-00028-of-00051.safetensors", + "model.layers.48.self_attn.o_proj.weight": "model-00028-of-00051.safetensors", + "model.layers.48.self_attn.q_proj.weight": "model-00028-of-00051.safetensors", + "model.layers.48.self_attn.v_proj.weight": "model-00028-of-00051.safetensors", + "model.layers.49.input_layernorm.weight": "model-00029-of-00051.safetensors", + "model.layers.49.mlp.down_proj.weight": "model-00029-of-00051.safetensors", + "model.layers.49.mlp.gate_proj.weight": "model-00029-of-00051.safetensors", + "model.layers.49.mlp.up_proj.weight": "model-00029-of-00051.safetensors", + "model.layers.49.post_attention_layernorm.weight": "model-00029-of-00051.safetensors", + "model.layers.49.self_attn.k_proj.weight": "model-00029-of-00051.safetensors", + "model.layers.49.self_attn.o_proj.weight": "model-00029-of-00051.safetensors", + "model.layers.49.self_attn.q_proj.weight": "model-00029-of-00051.safetensors", + "model.layers.49.self_attn.v_proj.weight": "model-00029-of-00051.safetensors", + "model.layers.5.input_layernorm.weight": "model-00004-of-00051.safetensors", + "model.layers.5.mlp.down_proj.weight": "model-00004-of-00051.safetensors", + "model.layers.5.mlp.gate_proj.weight": "model-00004-of-00051.safetensors", + "model.layers.5.mlp.up_proj.weight": "model-00004-of-00051.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00004-of-00051.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00004-of-00051.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00004-of-00051.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00004-of-00051.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00004-of-00051.safetensors", + "model.layers.50.input_layernorm.weight": "model-00030-of-00051.safetensors", + "model.layers.50.mlp.down_proj.weight": "model-00030-of-00051.safetensors", + "model.layers.50.mlp.gate_proj.weight": "model-00029-of-00051.safetensors", + "model.layers.50.mlp.up_proj.weight": "model-00030-of-00051.safetensors", + "model.layers.50.post_attention_layernorm.weight": "model-00030-of-00051.safetensors", + "model.layers.50.self_attn.k_proj.weight": "model-00029-of-00051.safetensors", + "model.layers.50.self_attn.o_proj.weight": "model-00029-of-00051.safetensors", + "model.layers.50.self_attn.q_proj.weight": "model-00029-of-00051.safetensors", + "model.layers.50.self_attn.v_proj.weight": "model-00029-of-00051.safetensors", + "model.layers.51.input_layernorm.weight": "model-00030-of-00051.safetensors", + "model.layers.51.mlp.down_proj.weight": "model-00030-of-00051.safetensors", + "model.layers.51.mlp.gate_proj.weight": "model-00030-of-00051.safetensors", + "model.layers.51.mlp.up_proj.weight": "model-00030-of-00051.safetensors", + "model.layers.51.post_attention_layernorm.weight": "model-00030-of-00051.safetensors", + "model.layers.51.self_attn.k_proj.weight": "model-00030-of-00051.safetensors", + "model.layers.51.self_attn.o_proj.weight": "model-00030-of-00051.safetensors", + "model.layers.51.self_attn.q_proj.weight": "model-00030-of-00051.safetensors", + "model.layers.51.self_attn.v_proj.weight": "model-00030-of-00051.safetensors", + "model.layers.52.input_layernorm.weight": "model-00031-of-00051.safetensors", + "model.layers.52.mlp.down_proj.weight": "model-00031-of-00051.safetensors", + "model.layers.52.mlp.gate_proj.weight": "model-00031-of-00051.safetensors", + "model.layers.52.mlp.up_proj.weight": "model-00031-of-00051.safetensors", + "model.layers.52.post_attention_layernorm.weight": "model-00031-of-00051.safetensors", + "model.layers.52.self_attn.k_proj.weight": "model-00030-of-00051.safetensors", + "model.layers.52.self_attn.o_proj.weight": "model-00030-of-00051.safetensors", + "model.layers.52.self_attn.q_proj.weight": "model-00030-of-00051.safetensors", + "model.layers.52.self_attn.v_proj.weight": "model-00030-of-00051.safetensors", + "model.layers.53.input_layernorm.weight": "model-00031-of-00051.safetensors", + "model.layers.53.mlp.down_proj.weight": "model-00031-of-00051.safetensors", + "model.layers.53.mlp.gate_proj.weight": "model-00031-of-00051.safetensors", + "model.layers.53.mlp.up_proj.weight": "model-00031-of-00051.safetensors", + "model.layers.53.post_attention_layernorm.weight": "model-00031-of-00051.safetensors", + "model.layers.53.self_attn.k_proj.weight": "model-00031-of-00051.safetensors", + "model.layers.53.self_attn.o_proj.weight": "model-00031-of-00051.safetensors", + "model.layers.53.self_attn.q_proj.weight": "model-00031-of-00051.safetensors", + "model.layers.53.self_attn.v_proj.weight": "model-00031-of-00051.safetensors", + "model.layers.54.input_layernorm.weight": "model-00032-of-00051.safetensors", + "model.layers.54.mlp.down_proj.weight": "model-00032-of-00051.safetensors", + "model.layers.54.mlp.gate_proj.weight": "model-00032-of-00051.safetensors", + "model.layers.54.mlp.up_proj.weight": "model-00032-of-00051.safetensors", + "model.layers.54.post_attention_layernorm.weight": "model-00032-of-00051.safetensors", + "model.layers.54.self_attn.k_proj.weight": "model-00032-of-00051.safetensors", + "model.layers.54.self_attn.o_proj.weight": "model-00032-of-00051.safetensors", + "model.layers.54.self_attn.q_proj.weight": "model-00032-of-00051.safetensors", + "model.layers.54.self_attn.v_proj.weight": "model-00032-of-00051.safetensors", + "model.layers.55.input_layernorm.weight": "model-00033-of-00051.safetensors", + "model.layers.55.mlp.down_proj.weight": "model-00033-of-00051.safetensors", + "model.layers.55.mlp.gate_proj.weight": "model-00032-of-00051.safetensors", + "model.layers.55.mlp.up_proj.weight": "model-00032-of-00051.safetensors", + "model.layers.55.post_attention_layernorm.weight": "model-00033-of-00051.safetensors", + "model.layers.55.self_attn.k_proj.weight": "model-00032-of-00051.safetensors", + "model.layers.55.self_attn.o_proj.weight": "model-00032-of-00051.safetensors", + "model.layers.55.self_attn.q_proj.weight": "model-00032-of-00051.safetensors", + "model.layers.55.self_attn.v_proj.weight": "model-00032-of-00051.safetensors", + "model.layers.56.input_layernorm.weight": "model-00033-of-00051.safetensors", + "model.layers.56.mlp.down_proj.weight": "model-00033-of-00051.safetensors", + "model.layers.56.mlp.gate_proj.weight": "model-00033-of-00051.safetensors", + "model.layers.56.mlp.up_proj.weight": "model-00033-of-00051.safetensors", + "model.layers.56.post_attention_layernorm.weight": "model-00033-of-00051.safetensors", + "model.layers.56.self_attn.k_proj.weight": "model-00033-of-00051.safetensors", + "model.layers.56.self_attn.o_proj.weight": "model-00033-of-00051.safetensors", + "model.layers.56.self_attn.q_proj.weight": "model-00033-of-00051.safetensors", + "model.layers.56.self_attn.v_proj.weight": "model-00033-of-00051.safetensors", + "model.layers.57.input_layernorm.weight": "model-00034-of-00051.safetensors", + "model.layers.57.mlp.down_proj.weight": "model-00034-of-00051.safetensors", + "model.layers.57.mlp.gate_proj.weight": "model-00033-of-00051.safetensors", + "model.layers.57.mlp.up_proj.weight": "model-00034-of-00051.safetensors", + "model.layers.57.post_attention_layernorm.weight": "model-00034-of-00051.safetensors", + "model.layers.57.self_attn.k_proj.weight": "model-00033-of-00051.safetensors", + "model.layers.57.self_attn.o_proj.weight": "model-00033-of-00051.safetensors", + "model.layers.57.self_attn.q_proj.weight": "model-00033-of-00051.safetensors", + "model.layers.57.self_attn.v_proj.weight": "model-00033-of-00051.safetensors", + "model.layers.58.input_layernorm.weight": "model-00034-of-00051.safetensors", + "model.layers.58.mlp.down_proj.weight": "model-00034-of-00051.safetensors", + "model.layers.58.mlp.gate_proj.weight": "model-00034-of-00051.safetensors", + "model.layers.58.mlp.up_proj.weight": "model-00034-of-00051.safetensors", + "model.layers.58.post_attention_layernorm.weight": "model-00034-of-00051.safetensors", + "model.layers.58.self_attn.k_proj.weight": "model-00034-of-00051.safetensors", + "model.layers.58.self_attn.o_proj.weight": "model-00034-of-00051.safetensors", + "model.layers.58.self_attn.q_proj.weight": "model-00034-of-00051.safetensors", + "model.layers.58.self_attn.v_proj.weight": "model-00034-of-00051.safetensors", + "model.layers.59.input_layernorm.weight": "model-00035-of-00051.safetensors", + "model.layers.59.mlp.down_proj.weight": "model-00035-of-00051.safetensors", + "model.layers.59.mlp.gate_proj.weight": "model-00035-of-00051.safetensors", + "model.layers.59.mlp.up_proj.weight": "model-00035-of-00051.safetensors", + "model.layers.59.post_attention_layernorm.weight": "model-00035-of-00051.safetensors", + "model.layers.59.self_attn.k_proj.weight": "model-00034-of-00051.safetensors", + "model.layers.59.self_attn.o_proj.weight": "model-00034-of-00051.safetensors", + "model.layers.59.self_attn.q_proj.weight": "model-00034-of-00051.safetensors", + "model.layers.59.self_attn.v_proj.weight": "model-00034-of-00051.safetensors", + "model.layers.6.input_layernorm.weight": "model-00005-of-00051.safetensors", + "model.layers.6.mlp.down_proj.weight": "model-00005-of-00051.safetensors", + "model.layers.6.mlp.gate_proj.weight": "model-00004-of-00051.safetensors", + "model.layers.6.mlp.up_proj.weight": "model-00004-of-00051.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00005-of-00051.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00004-of-00051.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00004-of-00051.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00004-of-00051.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00004-of-00051.safetensors", + "model.layers.60.input_layernorm.weight": "model-00035-of-00051.safetensors", + "model.layers.60.mlp.down_proj.weight": "model-00035-of-00051.safetensors", + "model.layers.60.mlp.gate_proj.weight": "model-00035-of-00051.safetensors", + "model.layers.60.mlp.up_proj.weight": "model-00035-of-00051.safetensors", + "model.layers.60.post_attention_layernorm.weight": "model-00035-of-00051.safetensors", + "model.layers.60.self_attn.k_proj.weight": "model-00035-of-00051.safetensors", + "model.layers.60.self_attn.o_proj.weight": "model-00035-of-00051.safetensors", + "model.layers.60.self_attn.q_proj.weight": "model-00035-of-00051.safetensors", + "model.layers.60.self_attn.v_proj.weight": "model-00035-of-00051.safetensors", + "model.layers.61.input_layernorm.weight": "model-00036-of-00051.safetensors", + "model.layers.61.mlp.down_proj.weight": "model-00036-of-00051.safetensors", + "model.layers.61.mlp.gate_proj.weight": "model-00036-of-00051.safetensors", + "model.layers.61.mlp.up_proj.weight": "model-00036-of-00051.safetensors", + "model.layers.61.post_attention_layernorm.weight": "model-00036-of-00051.safetensors", + "model.layers.61.self_attn.k_proj.weight": "model-00036-of-00051.safetensors", + "model.layers.61.self_attn.o_proj.weight": "model-00036-of-00051.safetensors", + "model.layers.61.self_attn.q_proj.weight": "model-00036-of-00051.safetensors", + "model.layers.61.self_attn.v_proj.weight": "model-00036-of-00051.safetensors", + "model.layers.62.input_layernorm.weight": "model-00037-of-00051.safetensors", + "model.layers.62.mlp.down_proj.weight": "model-00037-of-00051.safetensors", + "model.layers.62.mlp.gate_proj.weight": "model-00036-of-00051.safetensors", + "model.layers.62.mlp.up_proj.weight": "model-00036-of-00051.safetensors", + "model.layers.62.post_attention_layernorm.weight": "model-00037-of-00051.safetensors", + "model.layers.62.self_attn.k_proj.weight": "model-00036-of-00051.safetensors", + "model.layers.62.self_attn.o_proj.weight": "model-00036-of-00051.safetensors", + "model.layers.62.self_attn.q_proj.weight": "model-00036-of-00051.safetensors", + "model.layers.62.self_attn.v_proj.weight": "model-00036-of-00051.safetensors", + "model.layers.63.input_layernorm.weight": "model-00037-of-00051.safetensors", + "model.layers.63.mlp.down_proj.weight": "model-00037-of-00051.safetensors", + "model.layers.63.mlp.gate_proj.weight": "model-00037-of-00051.safetensors", + "model.layers.63.mlp.up_proj.weight": "model-00037-of-00051.safetensors", + "model.layers.63.post_attention_layernorm.weight": "model-00037-of-00051.safetensors", + "model.layers.63.self_attn.k_proj.weight": "model-00037-of-00051.safetensors", + "model.layers.63.self_attn.o_proj.weight": "model-00037-of-00051.safetensors", + "model.layers.63.self_attn.q_proj.weight": "model-00037-of-00051.safetensors", + "model.layers.63.self_attn.v_proj.weight": "model-00037-of-00051.safetensors", + "model.layers.64.input_layernorm.weight": "model-00038-of-00051.safetensors", + "model.layers.64.mlp.down_proj.weight": "model-00038-of-00051.safetensors", + "model.layers.64.mlp.gate_proj.weight": "model-00037-of-00051.safetensors", + "model.layers.64.mlp.up_proj.weight": "model-00038-of-00051.safetensors", + "model.layers.64.post_attention_layernorm.weight": "model-00038-of-00051.safetensors", + "model.layers.64.self_attn.k_proj.weight": "model-00037-of-00051.safetensors", + "model.layers.64.self_attn.o_proj.weight": "model-00037-of-00051.safetensors", + "model.layers.64.self_attn.q_proj.weight": "model-00037-of-00051.safetensors", + "model.layers.64.self_attn.v_proj.weight": "model-00037-of-00051.safetensors", + "model.layers.65.input_layernorm.weight": "model-00038-of-00051.safetensors", + "model.layers.65.mlp.down_proj.weight": "model-00038-of-00051.safetensors", + "model.layers.65.mlp.gate_proj.weight": "model-00038-of-00051.safetensors", + "model.layers.65.mlp.up_proj.weight": "model-00038-of-00051.safetensors", + "model.layers.65.post_attention_layernorm.weight": "model-00038-of-00051.safetensors", + "model.layers.65.self_attn.k_proj.weight": "model-00038-of-00051.safetensors", + "model.layers.65.self_attn.o_proj.weight": "model-00038-of-00051.safetensors", + "model.layers.65.self_attn.q_proj.weight": "model-00038-of-00051.safetensors", + "model.layers.65.self_attn.v_proj.weight": "model-00038-of-00051.safetensors", + "model.layers.66.input_layernorm.weight": "model-00039-of-00051.safetensors", + "model.layers.66.mlp.down_proj.weight": "model-00039-of-00051.safetensors", + "model.layers.66.mlp.gate_proj.weight": "model-00039-of-00051.safetensors", + "model.layers.66.mlp.up_proj.weight": "model-00039-of-00051.safetensors", + "model.layers.66.post_attention_layernorm.weight": "model-00039-of-00051.safetensors", + "model.layers.66.self_attn.k_proj.weight": "model-00038-of-00051.safetensors", + "model.layers.66.self_attn.o_proj.weight": "model-00038-of-00051.safetensors", + "model.layers.66.self_attn.q_proj.weight": "model-00038-of-00051.safetensors", + "model.layers.66.self_attn.v_proj.weight": "model-00038-of-00051.safetensors", + "model.layers.67.input_layernorm.weight": "model-00039-of-00051.safetensors", + "model.layers.67.mlp.down_proj.weight": "model-00039-of-00051.safetensors", + "model.layers.67.mlp.gate_proj.weight": "model-00039-of-00051.safetensors", + "model.layers.67.mlp.up_proj.weight": "model-00039-of-00051.safetensors", + "model.layers.67.post_attention_layernorm.weight": "model-00039-of-00051.safetensors", + "model.layers.67.self_attn.k_proj.weight": "model-00039-of-00051.safetensors", + "model.layers.67.self_attn.o_proj.weight": "model-00039-of-00051.safetensors", + "model.layers.67.self_attn.q_proj.weight": "model-00039-of-00051.safetensors", + "model.layers.67.self_attn.v_proj.weight": "model-00039-of-00051.safetensors", + "model.layers.68.input_layernorm.weight": "model-00040-of-00051.safetensors", + "model.layers.68.mlp.down_proj.weight": "model-00040-of-00051.safetensors", + "model.layers.68.mlp.gate_proj.weight": "model-00040-of-00051.safetensors", + "model.layers.68.mlp.up_proj.weight": "model-00040-of-00051.safetensors", + "model.layers.68.post_attention_layernorm.weight": "model-00040-of-00051.safetensors", + "model.layers.68.self_attn.k_proj.weight": "model-00040-of-00051.safetensors", + "model.layers.68.self_attn.o_proj.weight": "model-00040-of-00051.safetensors", + "model.layers.68.self_attn.q_proj.weight": "model-00040-of-00051.safetensors", + "model.layers.68.self_attn.v_proj.weight": "model-00040-of-00051.safetensors", + "model.layers.69.input_layernorm.weight": "model-00041-of-00051.safetensors", + "model.layers.69.mlp.down_proj.weight": "model-00041-of-00051.safetensors", + "model.layers.69.mlp.gate_proj.weight": "model-00040-of-00051.safetensors", + "model.layers.69.mlp.up_proj.weight": "model-00040-of-00051.safetensors", + "model.layers.69.post_attention_layernorm.weight": "model-00041-of-00051.safetensors", + "model.layers.69.self_attn.k_proj.weight": "model-00040-of-00051.safetensors", + "model.layers.69.self_attn.o_proj.weight": "model-00040-of-00051.safetensors", + "model.layers.69.self_attn.q_proj.weight": "model-00040-of-00051.safetensors", + "model.layers.69.self_attn.v_proj.weight": "model-00040-of-00051.safetensors", + "model.layers.7.input_layernorm.weight": "model-00005-of-00051.safetensors", + "model.layers.7.mlp.down_proj.weight": "model-00005-of-00051.safetensors", + "model.layers.7.mlp.gate_proj.weight": "model-00005-of-00051.safetensors", + "model.layers.7.mlp.up_proj.weight": "model-00005-of-00051.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00005-of-00051.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00005-of-00051.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00005-of-00051.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00005-of-00051.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00005-of-00051.safetensors", + "model.layers.70.input_layernorm.weight": "model-00041-of-00051.safetensors", + "model.layers.70.mlp.down_proj.weight": "model-00041-of-00051.safetensors", + "model.layers.70.mlp.gate_proj.weight": "model-00041-of-00051.safetensors", + "model.layers.70.mlp.up_proj.weight": "model-00041-of-00051.safetensors", + "model.layers.70.post_attention_layernorm.weight": "model-00041-of-00051.safetensors", + "model.layers.70.self_attn.k_proj.weight": "model-00041-of-00051.safetensors", + "model.layers.70.self_attn.o_proj.weight": "model-00041-of-00051.safetensors", + "model.layers.70.self_attn.q_proj.weight": "model-00041-of-00051.safetensors", + "model.layers.70.self_attn.v_proj.weight": "model-00041-of-00051.safetensors", + "model.layers.71.input_layernorm.weight": "model-00042-of-00051.safetensors", + "model.layers.71.mlp.down_proj.weight": "model-00042-of-00051.safetensors", + "model.layers.71.mlp.gate_proj.weight": "model-00041-of-00051.safetensors", + "model.layers.71.mlp.up_proj.weight": "model-00042-of-00051.safetensors", + "model.layers.71.post_attention_layernorm.weight": "model-00042-of-00051.safetensors", + "model.layers.71.self_attn.k_proj.weight": "model-00041-of-00051.safetensors", + "model.layers.71.self_attn.o_proj.weight": "model-00041-of-00051.safetensors", + "model.layers.71.self_attn.q_proj.weight": "model-00041-of-00051.safetensors", + "model.layers.71.self_attn.v_proj.weight": "model-00041-of-00051.safetensors", + "model.layers.72.input_layernorm.weight": "model-00042-of-00051.safetensors", + "model.layers.72.mlp.down_proj.weight": "model-00042-of-00051.safetensors", + "model.layers.72.mlp.gate_proj.weight": "model-00042-of-00051.safetensors", + "model.layers.72.mlp.up_proj.weight": "model-00042-of-00051.safetensors", + "model.layers.72.post_attention_layernorm.weight": "model-00042-of-00051.safetensors", + "model.layers.72.self_attn.k_proj.weight": "model-00042-of-00051.safetensors", + "model.layers.72.self_attn.o_proj.weight": "model-00042-of-00051.safetensors", + "model.layers.72.self_attn.q_proj.weight": "model-00042-of-00051.safetensors", + "model.layers.72.self_attn.v_proj.weight": "model-00042-of-00051.safetensors", + "model.layers.73.input_layernorm.weight": "model-00043-of-00051.safetensors", + "model.layers.73.mlp.down_proj.weight": "model-00043-of-00051.safetensors", + "model.layers.73.mlp.gate_proj.weight": "model-00043-of-00051.safetensors", + "model.layers.73.mlp.up_proj.weight": "model-00043-of-00051.safetensors", + "model.layers.73.post_attention_layernorm.weight": "model-00043-of-00051.safetensors", + "model.layers.73.self_attn.k_proj.weight": "model-00042-of-00051.safetensors", + "model.layers.73.self_attn.o_proj.weight": "model-00042-of-00051.safetensors", + "model.layers.73.self_attn.q_proj.weight": "model-00042-of-00051.safetensors", + "model.layers.73.self_attn.v_proj.weight": "model-00042-of-00051.safetensors", + "model.layers.74.input_layernorm.weight": "model-00043-of-00051.safetensors", + "model.layers.74.mlp.down_proj.weight": "model-00043-of-00051.safetensors", + "model.layers.74.mlp.gate_proj.weight": "model-00043-of-00051.safetensors", + "model.layers.74.mlp.up_proj.weight": "model-00043-of-00051.safetensors", + "model.layers.74.post_attention_layernorm.weight": "model-00043-of-00051.safetensors", + "model.layers.74.self_attn.k_proj.weight": "model-00043-of-00051.safetensors", + "model.layers.74.self_attn.o_proj.weight": "model-00043-of-00051.safetensors", + "model.layers.74.self_attn.q_proj.weight": "model-00043-of-00051.safetensors", + "model.layers.74.self_attn.v_proj.weight": "model-00043-of-00051.safetensors", + "model.layers.75.input_layernorm.weight": "model-00044-of-00051.safetensors", + "model.layers.75.mlp.down_proj.weight": "model-00044-of-00051.safetensors", + "model.layers.75.mlp.gate_proj.weight": "model-00044-of-00051.safetensors", + "model.layers.75.mlp.up_proj.weight": "model-00044-of-00051.safetensors", + "model.layers.75.post_attention_layernorm.weight": "model-00044-of-00051.safetensors", + "model.layers.75.self_attn.k_proj.weight": "model-00044-of-00051.safetensors", + "model.layers.75.self_attn.o_proj.weight": "model-00044-of-00051.safetensors", + "model.layers.75.self_attn.q_proj.weight": "model-00044-of-00051.safetensors", + "model.layers.75.self_attn.v_proj.weight": "model-00044-of-00051.safetensors", + "model.layers.76.input_layernorm.weight": "model-00045-of-00051.safetensors", + "model.layers.76.mlp.down_proj.weight": "model-00045-of-00051.safetensors", + "model.layers.76.mlp.gate_proj.weight": "model-00044-of-00051.safetensors", + "model.layers.76.mlp.up_proj.weight": "model-00044-of-00051.safetensors", + "model.layers.76.post_attention_layernorm.weight": "model-00045-of-00051.safetensors", + "model.layers.76.self_attn.k_proj.weight": "model-00044-of-00051.safetensors", + "model.layers.76.self_attn.o_proj.weight": "model-00044-of-00051.safetensors", + "model.layers.76.self_attn.q_proj.weight": "model-00044-of-00051.safetensors", + "model.layers.76.self_attn.v_proj.weight": "model-00044-of-00051.safetensors", + "model.layers.77.input_layernorm.weight": "model-00045-of-00051.safetensors", + "model.layers.77.mlp.down_proj.weight": "model-00045-of-00051.safetensors", + "model.layers.77.mlp.gate_proj.weight": "model-00045-of-00051.safetensors", + "model.layers.77.mlp.up_proj.weight": "model-00045-of-00051.safetensors", + "model.layers.77.post_attention_layernorm.weight": "model-00045-of-00051.safetensors", + "model.layers.77.self_attn.k_proj.weight": "model-00045-of-00051.safetensors", + "model.layers.77.self_attn.o_proj.weight": "model-00045-of-00051.safetensors", + "model.layers.77.self_attn.q_proj.weight": "model-00045-of-00051.safetensors", + "model.layers.77.self_attn.v_proj.weight": "model-00045-of-00051.safetensors", + "model.layers.78.input_layernorm.weight": "model-00046-of-00051.safetensors", + "model.layers.78.mlp.down_proj.weight": "model-00046-of-00051.safetensors", + "model.layers.78.mlp.gate_proj.weight": "model-00045-of-00051.safetensors", + "model.layers.78.mlp.up_proj.weight": "model-00046-of-00051.safetensors", + "model.layers.78.post_attention_layernorm.weight": "model-00046-of-00051.safetensors", + "model.layers.78.self_attn.k_proj.weight": "model-00045-of-00051.safetensors", + "model.layers.78.self_attn.o_proj.weight": "model-00045-of-00051.safetensors", + "model.layers.78.self_attn.q_proj.weight": "model-00045-of-00051.safetensors", + "model.layers.78.self_attn.v_proj.weight": "model-00045-of-00051.safetensors", + "model.layers.79.input_layernorm.weight": "model-00046-of-00051.safetensors", + "model.layers.79.mlp.down_proj.weight": "model-00046-of-00051.safetensors", + "model.layers.79.mlp.gate_proj.weight": "model-00046-of-00051.safetensors", + "model.layers.79.mlp.up_proj.weight": "model-00046-of-00051.safetensors", + "model.layers.79.post_attention_layernorm.weight": "model-00046-of-00051.safetensors", + "model.layers.79.self_attn.k_proj.weight": "model-00046-of-00051.safetensors", + "model.layers.79.self_attn.o_proj.weight": "model-00046-of-00051.safetensors", + "model.layers.79.self_attn.q_proj.weight": "model-00046-of-00051.safetensors", + "model.layers.79.self_attn.v_proj.weight": "model-00046-of-00051.safetensors", + "model.layers.8.input_layernorm.weight": "model-00006-of-00051.safetensors", + "model.layers.8.mlp.down_proj.weight": "model-00006-of-00051.safetensors", + "model.layers.8.mlp.gate_proj.weight": "model-00005-of-00051.safetensors", + "model.layers.8.mlp.up_proj.weight": "model-00006-of-00051.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00006-of-00051.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00005-of-00051.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00005-of-00051.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00005-of-00051.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00005-of-00051.safetensors", + "model.layers.80.input_layernorm.weight": "model-00047-of-00051.safetensors", + "model.layers.80.mlp.down_proj.weight": "model-00047-of-00051.safetensors", + "model.layers.80.mlp.gate_proj.weight": "model-00047-of-00051.safetensors", + "model.layers.80.mlp.up_proj.weight": "model-00047-of-00051.safetensors", + "model.layers.80.post_attention_layernorm.weight": "model-00047-of-00051.safetensors", + "model.layers.80.self_attn.k_proj.weight": "model-00046-of-00051.safetensors", + "model.layers.80.self_attn.o_proj.weight": "model-00046-of-00051.safetensors", + "model.layers.80.self_attn.q_proj.weight": "model-00046-of-00051.safetensors", + "model.layers.80.self_attn.v_proj.weight": "model-00046-of-00051.safetensors", + "model.layers.81.input_layernorm.weight": "model-00047-of-00051.safetensors", + "model.layers.81.mlp.down_proj.weight": "model-00047-of-00051.safetensors", + "model.layers.81.mlp.gate_proj.weight": "model-00047-of-00051.safetensors", + "model.layers.81.mlp.up_proj.weight": "model-00047-of-00051.safetensors", + "model.layers.81.post_attention_layernorm.weight": "model-00047-of-00051.safetensors", + "model.layers.81.self_attn.k_proj.weight": "model-00047-of-00051.safetensors", + "model.layers.81.self_attn.o_proj.weight": "model-00047-of-00051.safetensors", + "model.layers.81.self_attn.q_proj.weight": "model-00047-of-00051.safetensors", + "model.layers.81.self_attn.v_proj.weight": "model-00047-of-00051.safetensors", + "model.layers.82.input_layernorm.weight": "model-00048-of-00051.safetensors", + "model.layers.82.mlp.down_proj.weight": "model-00048-of-00051.safetensors", + "model.layers.82.mlp.gate_proj.weight": "model-00048-of-00051.safetensors", + "model.layers.82.mlp.up_proj.weight": "model-00048-of-00051.safetensors", + "model.layers.82.post_attention_layernorm.weight": "model-00048-of-00051.safetensors", + "model.layers.82.self_attn.k_proj.weight": "model-00048-of-00051.safetensors", + "model.layers.82.self_attn.o_proj.weight": "model-00048-of-00051.safetensors", + "model.layers.82.self_attn.q_proj.weight": "model-00048-of-00051.safetensors", + "model.layers.82.self_attn.v_proj.weight": "model-00048-of-00051.safetensors", + "model.layers.83.input_layernorm.weight": "model-00049-of-00051.safetensors", + "model.layers.83.mlp.down_proj.weight": "model-00049-of-00051.safetensors", + "model.layers.83.mlp.gate_proj.weight": "model-00048-of-00051.safetensors", + "model.layers.83.mlp.up_proj.weight": "model-00048-of-00051.safetensors", + "model.layers.83.post_attention_layernorm.weight": "model-00049-of-00051.safetensors", + "model.layers.83.self_attn.k_proj.weight": "model-00048-of-00051.safetensors", + "model.layers.83.self_attn.o_proj.weight": "model-00048-of-00051.safetensors", + "model.layers.83.self_attn.q_proj.weight": "model-00048-of-00051.safetensors", + "model.layers.83.self_attn.v_proj.weight": "model-00048-of-00051.safetensors", + "model.layers.84.input_layernorm.weight": "model-00049-of-00051.safetensors", + "model.layers.84.mlp.down_proj.weight": "model-00049-of-00051.safetensors", + "model.layers.84.mlp.gate_proj.weight": "model-00049-of-00051.safetensors", + "model.layers.84.mlp.up_proj.weight": "model-00049-of-00051.safetensors", + "model.layers.84.post_attention_layernorm.weight": "model-00049-of-00051.safetensors", + "model.layers.84.self_attn.k_proj.weight": "model-00049-of-00051.safetensors", + "model.layers.84.self_attn.o_proj.weight": "model-00049-of-00051.safetensors", + "model.layers.84.self_attn.q_proj.weight": "model-00049-of-00051.safetensors", + "model.layers.84.self_attn.v_proj.weight": "model-00049-of-00051.safetensors", + "model.layers.85.input_layernorm.weight": "model-00050-of-00051.safetensors", + "model.layers.85.mlp.down_proj.weight": "model-00050-of-00051.safetensors", + "model.layers.85.mlp.gate_proj.weight": "model-00049-of-00051.safetensors", + "model.layers.85.mlp.up_proj.weight": "model-00050-of-00051.safetensors", + "model.layers.85.post_attention_layernorm.weight": "model-00050-of-00051.safetensors", + "model.layers.85.self_attn.k_proj.weight": "model-00049-of-00051.safetensors", + "model.layers.85.self_attn.o_proj.weight": "model-00049-of-00051.safetensors", + "model.layers.85.self_attn.q_proj.weight": "model-00049-of-00051.safetensors", + "model.layers.85.self_attn.v_proj.weight": "model-00049-of-00051.safetensors", + "model.layers.86.input_layernorm.weight": "model-00050-of-00051.safetensors", + "model.layers.86.mlp.down_proj.weight": "model-00050-of-00051.safetensors", + "model.layers.86.mlp.gate_proj.weight": "model-00050-of-00051.safetensors", + "model.layers.86.mlp.up_proj.weight": "model-00050-of-00051.safetensors", + "model.layers.86.post_attention_layernorm.weight": "model-00050-of-00051.safetensors", + "model.layers.86.self_attn.k_proj.weight": "model-00050-of-00051.safetensors", + "model.layers.86.self_attn.o_proj.weight": "model-00050-of-00051.safetensors", + "model.layers.86.self_attn.q_proj.weight": "model-00050-of-00051.safetensors", + "model.layers.86.self_attn.v_proj.weight": "model-00050-of-00051.safetensors", + "model.layers.87.input_layernorm.weight": "model-00051-of-00051.safetensors", + "model.layers.87.mlp.down_proj.weight": "model-00051-of-00051.safetensors", + "model.layers.87.mlp.gate_proj.weight": "model-00051-of-00051.safetensors", + "model.layers.87.mlp.up_proj.weight": "model-00051-of-00051.safetensors", + "model.layers.87.post_attention_layernorm.weight": "model-00051-of-00051.safetensors", + "model.layers.87.self_attn.k_proj.weight": "model-00050-of-00051.safetensors", + "model.layers.87.self_attn.o_proj.weight": "model-00050-of-00051.safetensors", + "model.layers.87.self_attn.q_proj.weight": "model-00050-of-00051.safetensors", + "model.layers.87.self_attn.v_proj.weight": "model-00050-of-00051.safetensors", + "model.layers.9.input_layernorm.weight": "model-00006-of-00051.safetensors", + "model.layers.9.mlp.down_proj.weight": "model-00006-of-00051.safetensors", + "model.layers.9.mlp.gate_proj.weight": "model-00006-of-00051.safetensors", + "model.layers.9.mlp.up_proj.weight": "model-00006-of-00051.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00006-of-00051.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00006-of-00051.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00006-of-00051.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00006-of-00051.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00006-of-00051.safetensors", + "model.norm.weight": "model-00051-of-00051.safetensors" + } +}