diff --git a/config.json b/config.json new file mode 100644 index 0000000000000000000000000000000000000000..6a79ed4f762a20277d72938d07bef81c0c4d36f3 --- /dev/null +++ b/config.json @@ -0,0 +1,30 @@ +{ + "architectures": [ + "MixtralForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 1, + "eos_token_id": 2, + "hidden_act": "silu", + "hidden_size": 8192, + "initializer_range": 0.02, + "intermediate_size": 28672, + "max_position_embeddings": 8192, + "model_type": "mixtral", + "num_attention_heads": 64, + "num_experts_per_tok": 2, + "num_hidden_layers": 80, + "num_key_value_heads": 8, + "num_local_experts": 8, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_theta": 10000, + "router_aux_loss_coef": 0.001, + "router_jitter_noise": 0.0, + "sliding_window": null, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.40.1", + "use_cache": true, + "vocab_size": 102400 +} diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8a0505d8393c7cf690949e7d46272ba7f6f1b491 --- /dev/null +++ b/generation_config.json @@ -0,0 +1,6 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 2, + "transformers_version": "4.40.1" +} diff --git a/model-00001-of-00193.safetensors b/model-00001-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..81cef2a0e75314d474361f29e5d81d292551b99a --- /dev/null +++ b/model-00001-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee252ad16ab8c4435a2164d2b2bfe5cea977965f6f76dd23456d47b0c58f4ff4 +size 4798416360 diff --git a/model-00002-of-00193.safetensors b/model-00002-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..be929b5728d450990d804cf24cf8d23477a58d5a --- /dev/null +++ b/model-00002-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3333c630d3f5ac9010294ef4e92bc38412a331c0db15bc6f70a23a06e5bf731 +size 4697621824 diff --git a/model-00003-of-00193.safetensors b/model-00003-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cc04b74011d80476c3bb7cddc05d6935cb79d0ee --- /dev/null +++ b/model-00003-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df83e85801809a6942e3e5b59cef2aaa5375b16c891d61e1c4e98db8ffb8866f +size 4999776368 diff --git a/model-00004-of-00193.safetensors b/model-00004-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..da0633ba2d901ef2b9e6f1230377da9ae805d8a3 --- /dev/null +++ b/model-00004-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:189a741b81ee3acf943d6c183f90cfffd8e5709dcfe495206268858103ed022c +size 4697621824 diff --git a/model-00005-of-00193.safetensors b/model-00005-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..478a6513eaa04d36810be8a0914cac7a05782f2c --- /dev/null +++ b/model-00005-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ae663921ef1122966cdf9aee71a333aeb205b9d9ea8fd13a4be9398bc1d7619 +size 4697621824 diff --git a/model-00006-of-00193.safetensors b/model-00006-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e883534dd8d03898f61089690ce16484e4ef38ff --- /dev/null +++ b/model-00006-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:afe163192cf2d8f2a361e3d8bbadbfd5f718192a43b44db589d191814ec587bd +size 4999776360 diff --git a/model-00007-of-00193.safetensors b/model-00007-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3a41698c2324afa9eb8403bd714daf44a61dc25e --- /dev/null +++ b/model-00007-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe5caa449b493e719280cbb958fea8235e2c878df9cd08649fc6cc593e1abc78 +size 4697621824 diff --git a/model-00008-of-00193.safetensors b/model-00008-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..237875ca978ef431b3b1ce7bd949326e030c4b06 --- /dev/null +++ b/model-00008-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4cf92ee935581fe6cf30619f6eda144219e40800d1377c9abf44c08d3a59c5b +size 4999776368 diff --git a/model-00009-of-00193.safetensors b/model-00009-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..57dc9884a1c04847ed3807e680d3d1b388d3bee8 --- /dev/null +++ b/model-00009-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc72e39d1fbd62196cc578319d6aa84ce38f67530900ce26732db5ef86cad86b +size 4697621824 diff --git a/model-00010-of-00193.safetensors b/model-00010-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..525594584becfa6325f18b8287e6e77b6994defb --- /dev/null +++ b/model-00010-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e68bd35227d019a3223a9b793b1a9d731b463034de068aa31f7e4f7313bbb11b +size 4999776368 diff --git a/model-00011-of-00193.safetensors b/model-00011-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1738e748817bb3d47910e1367a6f030134c27c9b --- /dev/null +++ b/model-00011-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a022b272f9d49183fe6d795ad50811dc401e29a79f0a74f22bb6d5df2ea8ef0 +size 4697621824 diff --git a/model-00012-of-00193.safetensors b/model-00012-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..059a8e26bc7ccad2e5bd92d5a50fddd9fafc5f7f --- /dev/null +++ b/model-00012-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef2dbcb9712a8df369fa3c813dd1054bae9011569a029ae80fe1083c35933a59 +size 4697621824 diff --git a/model-00013-of-00193.safetensors b/model-00013-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..24271b451633c04c77bac1d7db53154231cb5051 --- /dev/null +++ b/model-00013-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c7b350c3263c954ef4ec7e0070441a36637463153135cab60f80f4f7872f741 +size 4999776368 diff --git a/model-00014-of-00193.safetensors b/model-00014-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..135d33d3f1b74e2c5978761bb8b3a8c73bd343ce --- /dev/null +++ b/model-00014-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8deee86a710b0cfd5e7c67324f18595e91909dad542d4d14db30ce2101e5b081 +size 4697621824 diff --git a/model-00015-of-00193.safetensors b/model-00015-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fc47ed178a3e9db2fbe884078cfa8d81340f59e0 --- /dev/null +++ b/model-00015-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e7d0278748ed7940a7b14dd36d3a80ad19c35785f871e50183c7733a948a724 +size 4999776368 diff --git a/model-00016-of-00193.safetensors b/model-00016-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..dcf90615a6663196ff46f69f81ae73bc1f9fd4c5 --- /dev/null +++ b/model-00016-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07976079b71f53a3a3eaacfa1e240d7d3a8aabf9754276ea79417bdea7817cee +size 4697621824 diff --git a/model-00017-of-00193.safetensors b/model-00017-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..eb95a0b07a2cbbf36ed57c09f87233939c171096 --- /dev/null +++ b/model-00017-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93302b80a1d80c14ae25cb5f27bece312611b3bf9809ab8a24bdaecbde644b91 +size 4697621824 diff --git a/model-00018-of-00193.safetensors b/model-00018-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..99ef072a3709b1b14eb67dde030e627885be4405 --- /dev/null +++ b/model-00018-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc0c919af75445055948abb1a445581ed43f1e2afe90f36d4fdef6c448ee107d +size 4999776360 diff --git a/model-00019-of-00193.safetensors b/model-00019-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2b8498d23cbace7321de84060faa734321f1a47a --- /dev/null +++ b/model-00019-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dbafdd88bcd8282c56994af921a80894b0bb487aebc46cff149bcb23e47b4285 +size 4697621824 diff --git a/model-00020-of-00193.safetensors b/model-00020-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0b02c883644373cb94d80322272053a38f70a51d --- /dev/null +++ b/model-00020-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:596c0ffd061a09655ae3eba5f16f65310e803c0880a07e522965d6d99421f6a6 +size 4999776368 diff --git a/model-00021-of-00193.safetensors b/model-00021-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9556c0cb495a8df498868016715bb28ba23e972f --- /dev/null +++ b/model-00021-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bacbf6e443f07d1efb53df4638f5af90304175bac1f302479bf021d428e81148 +size 4697621824 diff --git a/model-00022-of-00193.safetensors b/model-00022-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f4852f14912844e86d602ecd9f86641cb19e14cc --- /dev/null +++ b/model-00022-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4a9395833ed6359da4aeed7cd08ed90689e9fc3300fc402f75ed2ec1cda0593 +size 4999776368 diff --git a/model-00023-of-00193.safetensors b/model-00023-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f93ebf6658aa365208581d666085dd2c7bf739ba --- /dev/null +++ b/model-00023-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d90f2616c0f99d6a56ccbdad6a7630b7e91109f720b53415580c5de384a0e736 +size 4697621824 diff --git a/model-00024-of-00193.safetensors b/model-00024-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8a89892c775475331932fab8b4381c23b6072d02 --- /dev/null +++ b/model-00024-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a39870099876fd74ec68602ea661ce39f66e9041885258ea11be112966fb7e26 +size 4697621824 diff --git a/model-00025-of-00193.safetensors b/model-00025-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c0561c798d9f8a6f0ad0a430df9e40642deb380e --- /dev/null +++ b/model-00025-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e5836e49e659131cdf87fee401fbe51cfff97b71d286b742c7e1fbceff55eb0 +size 4999776376 diff --git a/model-00026-of-00193.safetensors b/model-00026-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d836472c272df4a92514a6417c58aac986090a9e --- /dev/null +++ b/model-00026-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c507aa31e9b6ee798c09e947f3f5089eb571a6b605e5a39aace12351fda4c9d3 +size 4697621832 diff --git a/model-00027-of-00193.safetensors b/model-00027-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7d294deb8668765baa5ec448fb6161710f7b2c73 --- /dev/null +++ b/model-00027-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b4446513879f1b726c9559010aa6058b817ace54d9eb46e5de0facef2626581 +size 4999776384 diff --git a/model-00028-of-00193.safetensors b/model-00028-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..be340eefc3edfb609b763875a5462257bd1d44a4 --- /dev/null +++ b/model-00028-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea1a71f2d2bd15cff751bb66a3c8d27749f3704e40b2208474da2176006227bf +size 4697621832 diff --git a/model-00029-of-00193.safetensors b/model-00029-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7e9f6261b265383d83bfe4dc6e6e2b07c9bc0e77 --- /dev/null +++ b/model-00029-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b4abab2cdce9bb38eb89ff80e795911f4f71813bb464df85c8209325ff1c46d +size 4697621832 diff --git a/model-00030-of-00193.safetensors b/model-00030-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1c37994f6bc4cf5fbc93ab6de68174313b40167f --- /dev/null +++ b/model-00030-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29db0d27978a265bc80e07b6bd4f2efb4f8c01461c60d99f8381067a8dd50909 +size 4999776376 diff --git a/model-00031-of-00193.safetensors b/model-00031-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a4dc4495a798036181c455707817fa6a34091b53 --- /dev/null +++ b/model-00031-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02462b1df37884f2986c603c332361121d372119ce839c6098990cab53f15eb5 +size 4697621832 diff --git a/model-00032-of-00193.safetensors b/model-00032-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..044453d1e615a77ac8625b15ddcefed5bd439bcc --- /dev/null +++ b/model-00032-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c4eeedb92e550923996d1ebb1b46b588bf4be945e4cf9497bc523d66bde4b4f +size 4999776384 diff --git a/model-00033-of-00193.safetensors b/model-00033-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9e179b1ca77ec53abb62acb74c77dabe9b39504e --- /dev/null +++ b/model-00033-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a99d2be426eebfef1b4678a4377176ac842cba6ef081d3748634b8600db8f67 +size 4697621832 diff --git a/model-00034-of-00193.safetensors b/model-00034-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b4b77ec1f501f3873dbce69250a7d3d2abf29814 --- /dev/null +++ b/model-00034-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa7bce8e7c07c333f493acc7e1b33b50f9cf844ef068214c9a6e6825248fec0e +size 4999776384 diff --git a/model-00035-of-00193.safetensors b/model-00035-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c3d33cc65b57f6c3edf2b87d9c6918c4fe010b5d --- /dev/null +++ b/model-00035-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd008924df9e8454526eb8182db193823f5ab9bcede55cecc6edc5a0bbf3d4cc +size 4697621832 diff --git a/model-00036-of-00193.safetensors b/model-00036-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..996b13f39aad780151fcbef344112a8a101648a1 --- /dev/null +++ b/model-00036-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85cfc4e8abe8f331fffd9a14f36fe7ad947745478d246740c689a237f7612eaa +size 4697621832 diff --git a/model-00037-of-00193.safetensors b/model-00037-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a458ed77c4c609bfc4fbbe945bf23eead59773af --- /dev/null +++ b/model-00037-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:081ad372bc9087d33be2e944e567534ee6cb1f08c25d263ff7a53c1b7ca1825e +size 4999776384 diff --git a/model-00038-of-00193.safetensors b/model-00038-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2ae0c4a8e21ed667b91b3ed92baf37d75f9c09d2 --- /dev/null +++ b/model-00038-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e5ad5f72e6aebbdd82800a207516f877027991181baa9fc2c475b5564db5976 +size 4697621832 diff --git a/model-00039-of-00193.safetensors b/model-00039-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ec77d1cf723927c12f2e9d8d92432704765aecca --- /dev/null +++ b/model-00039-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:92da26032ce48bad788f02fe0e9068f877b0f1a221da2bd5c7ff11beb36cf19b +size 4999776384 diff --git a/model-00040-of-00193.safetensors b/model-00040-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d07be6d281eea3138a0f5580caff0bc9c643b82c --- /dev/null +++ b/model-00040-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f79012a6a0c9c5c354abce055b8b6de17d03fa37d75f051f897595c5f87312dc +size 4697621832 diff --git a/model-00041-of-00193.safetensors b/model-00041-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..34853ce1a02308424f03354ad4e78f7431961eb1 --- /dev/null +++ b/model-00041-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:992e9afc1d8de96fb2707984a3a71989c1db8c7d99cf8f104581604abf802d1d +size 4697621832 diff --git a/model-00042-of-00193.safetensors b/model-00042-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8818c57a88aadece9812715f33dd1f101a80c1dd --- /dev/null +++ b/model-00042-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff6acab943a6495eed7d5dc05185ae861d61c3b9480d9ea69152da1d0f9ea40c +size 4999776376 diff --git a/model-00043-of-00193.safetensors b/model-00043-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..832e56d1f3673bfe1c458265c639c6f5de17b49e --- /dev/null +++ b/model-00043-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1783274dc9898c35fcd648e89ed451a9b7cd748248763b182bbe5135e1e3f075 +size 4697621832 diff --git a/model-00044-of-00193.safetensors b/model-00044-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bc30e47e2f544025a85681b9d694a4e88020057a --- /dev/null +++ b/model-00044-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f958646f8a171661301c52e669585ffc9b51b965f9e632395180ff91a53684c +size 4999776384 diff --git a/model-00045-of-00193.safetensors b/model-00045-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3f8036648344054e612dc0ed874c58bae1cc3aad --- /dev/null +++ b/model-00045-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb928574f7cc6e8917af69fdcd87c34dab1a5821764876c40e906e1c61de4121 +size 4697621832 diff --git a/model-00046-of-00193.safetensors b/model-00046-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..00bece69cf76d7bd6392c62dbf45044a00307117 --- /dev/null +++ b/model-00046-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:547ee660c6f6370fce2381d7269d68885bd6907002c33984bf6a994f47fcd33d +size 4999776384 diff --git a/model-00047-of-00193.safetensors b/model-00047-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..446c5b5289868ed49e1f1319e235c17a9aac0d54 --- /dev/null +++ b/model-00047-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0cf7b06048654d0957d473678744765b4bc4845687565b2a080e21259582238 +size 4697621832 diff --git a/model-00048-of-00193.safetensors b/model-00048-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..591c8ad7c922fb5f1da50f5a3cdf8cf74ef8ca19 --- /dev/null +++ b/model-00048-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6277ea1fa27e3af9ba567f9e7a18300cdb685ec1ae90c09b505662fc3b8f1cf2 +size 4697621832 diff --git a/model-00049-of-00193.safetensors b/model-00049-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5ba3070c26ee84a0d526cc87ab6493b2cd99e8ef --- /dev/null +++ b/model-00049-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6c4bc9de1ef11f92db4f2e1fb88813b85da6f44694e3ee0a3597061e01c8f62 +size 4999776384 diff --git a/model-00050-of-00193.safetensors b/model-00050-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0bac7ca4ca119402c1d05645e104ceb9b6d79a2f --- /dev/null +++ b/model-00050-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:657e0aa84f7db4167d41822eb25b4722121e9bdaf421e0694453ba4c41de70ee +size 4697621832 diff --git a/model-00051-of-00193.safetensors b/model-00051-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1c1cde92c43e3530395c55999a6057b45b1edd0c --- /dev/null +++ b/model-00051-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5ebe177acc5eccefefef7065c170a9c37e83c8382ee0f6c7ccbcd104a930532 +size 4999776384 diff --git a/model-00052-of-00193.safetensors b/model-00052-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4ea2d844ebc65265f0df148454f782ea0c74adbb --- /dev/null +++ b/model-00052-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc598cf279567ed9cf7feb1bd5d2cb52f52c6ecb58a49746932fdcf44ae2b7b3 +size 4697621832 diff --git a/model-00053-of-00193.safetensors b/model-00053-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bce4f42bbc6d907b36cf43564672db1461ce142e --- /dev/null +++ b/model-00053-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3988a3c17d7ba96b25dd9a6dec7660713736585f3c16c8b8b9424695c74651ea +size 4697621832 diff --git a/model-00054-of-00193.safetensors b/model-00054-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..286c233597eaef6adb150d02fcef2417eae936e8 --- /dev/null +++ b/model-00054-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad76982f5745a44cb9c455e03dd21ba1f286f6ea8a1fc836f59b6255c2ac2497 +size 4999776376 diff --git a/model-00055-of-00193.safetensors b/model-00055-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..dcc2466ed3cfbae5e714d7d27f5043075b1f98d9 --- /dev/null +++ b/model-00055-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:163f9e4332d828c8045450ad8181eac967b8d38df0b39ea2d75c06e4ec9f08bd +size 4697621832 diff --git a/model-00056-of-00193.safetensors b/model-00056-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f8017bc34078d32a57f0981659ac62c3758fc1e0 --- /dev/null +++ b/model-00056-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df4821e30a6784c8bf9bc73ed070625fe055396eb9df78aa61621e72da9e4a4b +size 4999776384 diff --git a/model-00057-of-00193.safetensors b/model-00057-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e4567e1f03b2c646a19c5cbb5feac98612828bcf --- /dev/null +++ b/model-00057-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6965e7d8030d3de060037bdc8adbeac315dea541119f2659e61610fc8311f1e9 +size 4697621832 diff --git a/model-00058-of-00193.safetensors b/model-00058-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e700edc30fa2f6f627fa9e027365cb671208e1b8 --- /dev/null +++ b/model-00058-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:170f53b4a8d18d1dd9ba2230ce72d37beed5ce0c24bca37b9258a994e68ddb9e +size 4999776384 diff --git a/model-00059-of-00193.safetensors b/model-00059-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..285d43ce9c9a6f4a1abe371736da5bf21e36caba --- /dev/null +++ b/model-00059-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9b7826842de6a2d7d1dca0a90c8e1bb96ae4b9d1bca8c632685e08583a13b94 +size 4697621832 diff --git a/model-00060-of-00193.safetensors b/model-00060-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c46d7850021c77d5da0265ad512e9a98ce41426b --- /dev/null +++ b/model-00060-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed9e94294298f048080ca9c8f5059331f5d7771c5a3d9aa1137677d544fbf4a4 +size 4697621832 diff --git a/model-00061-of-00193.safetensors b/model-00061-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..972395da4894fd5b0a5abe3d12f01e67c79ec3c6 --- /dev/null +++ b/model-00061-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9b02ede041b1076b795ad7f4a91e72eaa569072d29dfa48bfd0b04f388a588d +size 4999776384 diff --git a/model-00062-of-00193.safetensors b/model-00062-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4eb5c316357085cea5142af1cc865e62e466601f --- /dev/null +++ b/model-00062-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0412fc0fec19d3bf520513b2404a7a0fdd0a58d03e861d9553ff4a8e2b562a2c +size 4697621832 diff --git a/model-00063-of-00193.safetensors b/model-00063-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..213f158dce1f61ab10ca2676267893f2a83cff6e --- /dev/null +++ b/model-00063-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c98f00dabc9e20f48070eb556725f771b79d1e2b4fa526035dfea84cdd62f296 +size 4999776384 diff --git a/model-00064-of-00193.safetensors b/model-00064-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6f996d96076fc06a0896c74ced37cbb6106ce073 --- /dev/null +++ b/model-00064-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af919763df7d570ca4f21b7f3eb35cb82dc88b05cff1a08fab1f78fcfd4ab478 +size 4697621832 diff --git a/model-00065-of-00193.safetensors b/model-00065-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..92f7a13600be0362deec058b59a25c3e055b05a6 --- /dev/null +++ b/model-00065-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e659e1ad1ef371237c0ce046773abfb9d3e5a4d89132b7b628ffcec570a952e +size 4697621832 diff --git a/model-00066-of-00193.safetensors b/model-00066-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..37ed23a811b99ccc6db0cecd6c3ee5ce2f930e2e --- /dev/null +++ b/model-00066-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3b7c581ca71cd37540537195c8b360286b0a48dbe9cac522ae8921ca9d6ae8b +size 4999776376 diff --git a/model-00067-of-00193.safetensors b/model-00067-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a270c3bda72925eaedf744c535ca5416dcac378b --- /dev/null +++ b/model-00067-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f49660e4522a5c399ff73d4ca511ca6f59757028867baad24e1f239a35e39ec1 +size 4697621832 diff --git a/model-00068-of-00193.safetensors b/model-00068-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0b848ef9a8d41177fd75bd5e46d03d09a7d863f1 --- /dev/null +++ b/model-00068-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad61ae1e69e49dbd155b07bd490eaa34a5f165d0f4c1f5153b8635a56dfbc93e +size 4999776384 diff --git a/model-00069-of-00193.safetensors b/model-00069-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0a295873e34b744a399f6901bb4fb59b18b63c0a --- /dev/null +++ b/model-00069-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c2db9bf9ba4bcebd6ff3c5f9e3a6a2dc59bb7d1cf16838be5963bc20876649c +size 4697621832 diff --git a/model-00070-of-00193.safetensors b/model-00070-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f3ef26b8d3a039c026314eb3c15f18f4c3e7a8fa --- /dev/null +++ b/model-00070-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3e9476a25a2af266a07c8c9a471bd6caf45beb9b211a07361716aef53d3b56e +size 4999776384 diff --git a/model-00071-of-00193.safetensors b/model-00071-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ed19ecd0a33ecbf9eed789b2b3823eace270621a --- /dev/null +++ b/model-00071-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4734cbde2d5778714cafd9845a73db88738c79b801a9d5e4da408f93d38ebbd +size 4697621832 diff --git a/model-00072-of-00193.safetensors b/model-00072-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4c390b3e6eba3e21f4c02622843b14c37814ed01 --- /dev/null +++ b/model-00072-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d33ee321f8e2a2655b8977463233cfd708fe74ec94804c970cfb7509aa75c94 +size 4697621832 diff --git a/model-00073-of-00193.safetensors b/model-00073-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..097bc8815e3fe1fe5b5a3e064115660e27256aac --- /dev/null +++ b/model-00073-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2cf09f145cd0a016bba13bce5707f59c67e6895d06e45fc1016c2fc96ac3750d +size 4999776384 diff --git a/model-00074-of-00193.safetensors b/model-00074-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..49014c9a13111339c597c6039a71be3a1e746183 --- /dev/null +++ b/model-00074-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0270524dac343c41c14f5e3f4b87a4a9ab1eb56958075b360eb47e62d4ec21b +size 4697621832 diff --git a/model-00075-of-00193.safetensors b/model-00075-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5c62544d735d00fb1aa05e6396ae0c339be52ade --- /dev/null +++ b/model-00075-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c8d078ea3ef7b17c26495ac4999d3fd1410aab1ca72bc19de9044d77066354d +size 4999776384 diff --git a/model-00076-of-00193.safetensors b/model-00076-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fffa6931f547b6f9bc3803d094bb5269519c9c89 --- /dev/null +++ b/model-00076-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf6abda95d1783b0d203f79245b070aecb024b22be9c49cf03f1351f701bc399 +size 4697621832 diff --git a/model-00077-of-00193.safetensors b/model-00077-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1ca140cf4c104c14abe1b4a390ac832cb379f660 --- /dev/null +++ b/model-00077-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41b6486e7c1ee510997a7a2265767a9bf7c817cd94bfaee8b92c55cd4b116f1e +size 4697621832 diff --git a/model-00078-of-00193.safetensors b/model-00078-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..14f1378ebdfd1ab30b26f72d8dfe9e05f0f841e4 --- /dev/null +++ b/model-00078-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5175d83710777be84ae0d338c7e63c2063471b3a203eeb6e0e636ed0775719f2 +size 4999776376 diff --git a/model-00079-of-00193.safetensors b/model-00079-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..150dcf29074ecfaae59b4f9b5df3b67d63cc2f92 --- /dev/null +++ b/model-00079-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05db07c3b6227757a514466c06ebd0ac6f15c1e2604f64869102c26bc8943160 +size 4697621832 diff --git a/model-00080-of-00193.safetensors b/model-00080-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e0a780b47b3a2c3aea912f7bd50744263c02552f --- /dev/null +++ b/model-00080-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72c98577b9c573a5d54fbdc54c6e7553e351c2de552d5cf2b2895573eb33915a +size 4999776384 diff --git a/model-00081-of-00193.safetensors b/model-00081-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ad31169d54a65efd2e95f856006bf3ca1422482e --- /dev/null +++ b/model-00081-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d5f523801b157da7589695767a5315119036f5d477c60c9ab2be6cc89fe70ff +size 4697621832 diff --git a/model-00082-of-00193.safetensors b/model-00082-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4bf530187d30e2f6d2c5549eeed04b863cadefe3 --- /dev/null +++ b/model-00082-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d148581040e9fc880ec9315d050132581fe5fd05ad41204494ec2cffc31e0442 +size 4999776384 diff --git a/model-00083-of-00193.safetensors b/model-00083-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6973320623afeeddfa97418a75ddb4eeeeef962b --- /dev/null +++ b/model-00083-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2be61f137a6ef414b4a733e6e5fd4052680ea6987119c084f5e7030e6674dd92 +size 4697621832 diff --git a/model-00084-of-00193.safetensors b/model-00084-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..45b3e2c38c5a20d919a53b06c27ac58fc8541627 --- /dev/null +++ b/model-00084-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b23012fdf19e7a5dc8a1e90e1cc0cfe870509e29909c237a83c959996b9cb840 +size 4697621832 diff --git a/model-00085-of-00193.safetensors b/model-00085-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bb8c2ca86411f458f30d67380d23af45267d8ec3 --- /dev/null +++ b/model-00085-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:752440ca3fa8f34a65175953dce903cc54f657d76a97ae97b1aa7b4c03cfed1e +size 4999776384 diff --git a/model-00086-of-00193.safetensors b/model-00086-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..287f766b3190b5e9064a477862ade77f69c0c7c3 --- /dev/null +++ b/model-00086-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c07ca0bc5efdb4d09b8bdfb20627d948a4e361323838156833571939eb8ca21 +size 4697621832 diff --git a/model-00087-of-00193.safetensors b/model-00087-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b38cba0d1d59611b24721bb05d87c4c76a2ab2d1 --- /dev/null +++ b/model-00087-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0250d5313b6bb9c799f88a7e0a08ec98a2fba1a9f9475a5b616c7411e2a9138 +size 4999776384 diff --git a/model-00088-of-00193.safetensors b/model-00088-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..237309fcfcf0187edb72658238476106e5dadec8 --- /dev/null +++ b/model-00088-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2594f36b24b62549a6a3eb89bc9d0442ef6f837810b770c5638b4b4693421fb4 +size 4697621832 diff --git a/model-00089-of-00193.safetensors b/model-00089-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e59071cf6a5ab0b4e380e38f2ae5e2ea2a3c92f6 --- /dev/null +++ b/model-00089-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9247cb97499ffb358923dbb731d14745aa840a637a79e106abab24c184bc2ece +size 4697621832 diff --git a/model-00090-of-00193.safetensors b/model-00090-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6bc7e802d568c4dc7b2ac495c34f360c78132a4c --- /dev/null +++ b/model-00090-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f9d8045b15aae64e759d706cb5f3bbbe5dbd6ca1891a80dd32d2e5c8098a220 +size 4999776376 diff --git a/model-00091-of-00193.safetensors b/model-00091-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cfbae1b5b18b0275414eadc6977ee1e8c69dee32 --- /dev/null +++ b/model-00091-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc77898fc03d4259846ce302329118f429e9e317db7148ad17929ea3b011dfb2 +size 4697621832 diff --git a/model-00092-of-00193.safetensors b/model-00092-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..81cb84e56afd75bcd57a79062f1fda959e72218d --- /dev/null +++ b/model-00092-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:46f0969c7d0c01ed055e0db8df7488e61f0f264953079763f5563f5ab48f4675 +size 4999776384 diff --git a/model-00093-of-00193.safetensors b/model-00093-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0c8d83f3431b5dd23733433f5892fbffca3bd630 --- /dev/null +++ b/model-00093-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6f5fd902980d9512e596803e9f5d48381cf1cdcecb280fac55fbac08749ccc1 +size 4697621832 diff --git a/model-00094-of-00193.safetensors b/model-00094-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..90ee6c303aabdc3123db2891ac32b63b4e0b46da --- /dev/null +++ b/model-00094-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da1ebd8b5da6265b86675bf73e952f559844adcc6ca8270ba8a96ba39557d806 +size 4999776384 diff --git a/model-00095-of-00193.safetensors b/model-00095-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e0b09c4be0fd3275064fffe5224060f96d62f506 --- /dev/null +++ b/model-00095-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b42db38327fb1de230626b4e9982f49b6e6bcb43dc6f2c57a468a13505b91b4 +size 4697621832 diff --git a/model-00096-of-00193.safetensors b/model-00096-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..61b1b66e8be86e87064129a363bd129f7ab7c455 --- /dev/null +++ b/model-00096-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26b93a9adb72f73c9a0a02bd623fad2b5eaa5779310e393c5d004a135574bb57 +size 4697621832 diff --git a/model-00097-of-00193.safetensors b/model-00097-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..637510f1ff7c0c8238383a76a2977f7e5bac3499 --- /dev/null +++ b/model-00097-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:edc30bc5f846870a3baf7cc256f098670ed80283810347054900399955297308 +size 4999776384 diff --git a/model-00098-of-00193.safetensors b/model-00098-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..48fd6f3b200cc4067d23f1db644dcc29842a5caa --- /dev/null +++ b/model-00098-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c0aaf6ba4c285509011f2ab8c521166caf1cdb14dc12a2098ab2e8bb35f8973 +size 4697621832 diff --git a/model-00099-of-00193.safetensors b/model-00099-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..845af006aac42cb8651acb1972ab7173dde6c245 --- /dev/null +++ b/model-00099-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a46f55931ade46bb5374eb824bd8e7604c4d28cf596c81190bf21a4abe8cf9af +size 4999776384 diff --git a/model-00100-of-00193.safetensors b/model-00100-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..338d800afa8391419b348c9c19dd0dab2be08dda --- /dev/null +++ b/model-00100-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c971cbe3e14fe922d00d54257f14b7bb40e910799b6897f561e8e5efcba201bf +size 4697621832 diff --git a/model-00101-of-00193.safetensors b/model-00101-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ba8c9ff7c44ceec7762b78302b7c7b1435976e6e --- /dev/null +++ b/model-00101-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7c762570c44fca18dce2ff73f58f3a801b94e894831a82049491ee3d3a38409 +size 4697621832 diff --git a/model-00102-of-00193.safetensors b/model-00102-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fa15f400ab11a4b37c550ced474df1252c05fba5 --- /dev/null +++ b/model-00102-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c5aee38b60568c2edc442c29c64df21cd49dca8829fc07dd6d5390538dea364 +size 4999776376 diff --git a/model-00103-of-00193.safetensors b/model-00103-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d34085ea154ac4c35162e9bcd5cbc21af2ecccfd --- /dev/null +++ b/model-00103-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97a2cf17388b52149e0a597802dada04150a6c64e63a20027141b19f53bc8003 +size 4697621832 diff --git a/model-00104-of-00193.safetensors b/model-00104-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..49605193e9609749da7140f5a200ea7ac3f68e1f --- /dev/null +++ b/model-00104-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4d961d5feb7c81536865c7f43e466b7507a2dc00c3379edf3926630503e9475 +size 4999776384 diff --git a/model-00105-of-00193.safetensors b/model-00105-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3ed1831da3cc23608e8d6c952ea295ee2ebf9cb9 --- /dev/null +++ b/model-00105-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e076743f44a25ff96710d840563c4086dbf73d270efd65c28e144b8002af77e6 +size 4697621832 diff --git a/model-00106-of-00193.safetensors b/model-00106-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..70a1b187ab379b0a20f0db5f43a7410817a87a1c --- /dev/null +++ b/model-00106-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:881827012871c670451912f7ec4e69ea2dda997f2c213656eb7eefbc06ed7a5d +size 4999776384 diff --git a/model-00107-of-00193.safetensors b/model-00107-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..adab7183a5bb441ce55cc7714083f4fe7590e890 --- /dev/null +++ b/model-00107-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:887adb9e8183c8200f5033638aeb8999d1fa1fdeeed7344dcfcac08eabd952d7 +size 4697621832 diff --git a/model-00108-of-00193.safetensors b/model-00108-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0865c3f26ea2a86b828824cd044380555e0424fb --- /dev/null +++ b/model-00108-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:119843f8f492c62f6c8b905b7e652fce019465c314e5c9c67c59d25d8b533755 +size 4697621832 diff --git a/model-00109-of-00193.safetensors b/model-00109-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f2426d449ae42c970ac2205d97aa00cc41990b97 --- /dev/null +++ b/model-00109-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91991bc319a83d11519d7399728534cf9387748949f2e013e0aa12f6e3af8149 +size 4999776384 diff --git a/model-00110-of-00193.safetensors b/model-00110-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d23f3f4ef1d9bca35631adb32756c456477d51c4 --- /dev/null +++ b/model-00110-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd8d316270880d85f8400d725ec5299619ec544709756f9e32e907eef06f0c4a +size 4697621832 diff --git a/model-00111-of-00193.safetensors b/model-00111-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e6313948f6d04756a66152ebf487b63288586334 --- /dev/null +++ b/model-00111-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10d20920b9925da46a1c7eb5f8a43b78a45364c03bd9b79909d89dbd2e62ec6f +size 4999776384 diff --git a/model-00112-of-00193.safetensors b/model-00112-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ad7a103bb365032d41a56f2e53e27adcceb93a7f --- /dev/null +++ b/model-00112-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69da9d5c24a23feb57531b14e5633ab6b47aa158397cdb2fc24404269a565f9d +size 4697621832 diff --git a/model-00113-of-00193.safetensors b/model-00113-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..db6cd50e298d5c1642edb5653e42ed255881a34d --- /dev/null +++ b/model-00113-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dde909281f8a8556102e2fa91cc1c1c759259bdbfaae7832e58e75260d4f2f62 +size 4697621832 diff --git a/model-00114-of-00193.safetensors b/model-00114-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..763840e9b8e3326f0f22da676dd73426a69da9ca --- /dev/null +++ b/model-00114-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f866d46b52655082d0528d07c4de5ff2dc0d89c8ce909bee032fc0f5ee8165c +size 4999776376 diff --git a/model-00115-of-00193.safetensors b/model-00115-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..efdc1fa9445f8ce22b88a89a7797e2fb46e09d7d --- /dev/null +++ b/model-00115-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a6504a0528259fa4ba0c696da72125a040f59bf3d228e8741f71053a6847c92 +size 4697621832 diff --git a/model-00116-of-00193.safetensors b/model-00116-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2121addaeee61632d8a6958a53a9cf04faf5cff0 --- /dev/null +++ b/model-00116-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b038e1db995d5055634a548a193f41b74b34c49e08bb4d5725f412f48e407cf4 +size 4999776384 diff --git a/model-00117-of-00193.safetensors b/model-00117-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9d7f3396a78bc9a8ac45ed8b6d85d988451e153d --- /dev/null +++ b/model-00117-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ceefcae314c8bceb4b0d56e5deb31dabaa079215ddbc648326ca2e81c8750b6 +size 4697621832 diff --git a/model-00118-of-00193.safetensors b/model-00118-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..98bcd590c1133a6f493303df846f1181f96316d7 --- /dev/null +++ b/model-00118-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a1e7adedddd741dbcdab73d579b1a475474b5b0e53817453f7cf6bdc00e93d3 +size 4999776384 diff --git a/model-00119-of-00193.safetensors b/model-00119-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..06898652bfd41bc3790e2e04a9b7cf131506e9f2 --- /dev/null +++ b/model-00119-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5047ad5b186ce3b4ff89a5b50e95661a4f9ac4cc6304f324fb4c68e10f299a29 +size 4697621832 diff --git a/model-00120-of-00193.safetensors b/model-00120-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bbdc2ad3b5d6b6ecf1624f6680ccb1b0b43523d4 --- /dev/null +++ b/model-00120-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5928e46d02f4b0a0f37afdeddbd1f9745faebbfab16c6123b581699f47883849 +size 4697621832 diff --git a/model-00121-of-00193.safetensors b/model-00121-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0d4b6a7c7265af902b099c4c8b177c051ab2c83d --- /dev/null +++ b/model-00121-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07f2d3d862bbf55ab80f9b79b2e63af6516b7a13d45df56e30bdb9f7b868edbe +size 4999776384 diff --git a/model-00122-of-00193.safetensors b/model-00122-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..87d80a9d766f6165b194abd0d0637be974bffc98 --- /dev/null +++ b/model-00122-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38f75432f135f1662ffd330eadd67ce0789fd76237468dffc6a29aed845353a9 +size 4697621832 diff --git a/model-00123-of-00193.safetensors b/model-00123-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1e83ad94129a8befd8d4aeeaa0a5b12c33dbe5bc --- /dev/null +++ b/model-00123-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a3284bdb5f7c9e5208341867125b116d2749fa57d8771cae0a11ffac1913606 +size 4999776384 diff --git a/model-00124-of-00193.safetensors b/model-00124-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fae2a785ee0a736f4cef2dde69ee83df3a31a8cd --- /dev/null +++ b/model-00124-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3a51d5b994357118a00e3274950b327de40be0e5da42befa44bb9115bb9701e +size 4697621832 diff --git a/model-00125-of-00193.safetensors b/model-00125-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b57d10ec288c2ff15ef31f4779307a61e81b9035 --- /dev/null +++ b/model-00125-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22d1da9732ee78c7b5d6917dbeacf4f7025295905a18c8f1c11420fc210f8eee +size 4697621832 diff --git a/model-00126-of-00193.safetensors b/model-00126-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f4ea2d9063c79a0452136da8655c2547483d704a --- /dev/null +++ b/model-00126-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7eeba3eb6cd46054661baa11d0a6b3f0d99f7bcb6d5ded30e3de4c507626b482 +size 4999776376 diff --git a/model-00127-of-00193.safetensors b/model-00127-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d6ea121a5d5a4c5a8c18266e372130c254c4ce94 --- /dev/null +++ b/model-00127-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ecf1ebacf941b30a43d46763d92a162de40d9a9a99bb3d6fc13e04c79e901698 +size 4697621832 diff --git a/model-00128-of-00193.safetensors b/model-00128-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b9d030c30e229ee1362d31f6a299d9e91c6dcf71 --- /dev/null +++ b/model-00128-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f761766f72ddb53ad000e44333e1c4be0bd64035dcfb674857da8e2e0fc6f320 +size 4999776384 diff --git a/model-00129-of-00193.safetensors b/model-00129-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fe81c6533a0d991c993a37de6e9803db643147ad --- /dev/null +++ b/model-00129-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76b218e1211b0c27f7cc659222a2492d505f9d5ee11e7698fe3997cf85748939 +size 4697621832 diff --git a/model-00130-of-00193.safetensors b/model-00130-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7509f6dbe9424d87b168639a88edf6284c7e615c --- /dev/null +++ b/model-00130-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c409754bfb394dee252df90ecf85fcd196ee7880a24b88ab403c5baab14a9825 +size 4999776384 diff --git a/model-00131-of-00193.safetensors b/model-00131-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..25005c75b36839d5936bbf896f302a30d20d52ac --- /dev/null +++ b/model-00131-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e660c41a65f98c2d7130aa54a2a9c0fe44d6c63f60c04e3a8c6d8d2002a973a8 +size 4697621832 diff --git a/model-00132-of-00193.safetensors b/model-00132-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4154343c4375259cddfcc83b0cdbb3d509995188 --- /dev/null +++ b/model-00132-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:110e29ac22893e606f9e9999301166b6743aef30b18f840c1cc3ca340b9f711f +size 4697621832 diff --git a/model-00133-of-00193.safetensors b/model-00133-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0cd30ef6b34f75f92172307f20fc9e7fdba64f1b --- /dev/null +++ b/model-00133-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:925a25a8ed44ca0812cf12214097719c58981951b2b063ca08cfde78116e6768 +size 4999776384 diff --git a/model-00134-of-00193.safetensors b/model-00134-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5877cc1b86a572e914bf5b5683d570eedb47a6d3 --- /dev/null +++ b/model-00134-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:126bb45fdf8626a647123038fff285af8a6dadfaede190ed8194b41d9e9df8a9 +size 4697621832 diff --git a/model-00135-of-00193.safetensors b/model-00135-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cf002cc03186363e4d7e4bbe77f3e798161d02a3 --- /dev/null +++ b/model-00135-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ce919ce1288871c0c6522495946bd4c1c473f3515e349846410149e620285fc +size 4999776384 diff --git a/model-00136-of-00193.safetensors b/model-00136-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..741339bd65eb4fe6e6a37621fc40433d6ad6b068 --- /dev/null +++ b/model-00136-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3fc8383cd930cfc5eb4c6843158cd3db4c25e5920f057c8502feb62d54d7c37 +size 4697621832 diff --git a/model-00137-of-00193.safetensors b/model-00137-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bdc890becfe0b2e6cd2576d33f1b356f2ce356e4 --- /dev/null +++ b/model-00137-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88eed1cfe1b8de28aa000e907092ef7814f21d462e0d290f38f323fedc9deb45 +size 4697621832 diff --git a/model-00138-of-00193.safetensors b/model-00138-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f217686479cb30a391e2250ce5474c8baedf9f42 --- /dev/null +++ b/model-00138-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a068f18e989eae2b91390225f7373ee9c56e13d7d8ee6f447f4b0bfb387a25f2 +size 4999776376 diff --git a/model-00139-of-00193.safetensors b/model-00139-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..acc5ad183cfe6b4e080200dbdc91540e4e8388b1 --- /dev/null +++ b/model-00139-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3ec5d0ec8dde7a1248a1cf02e66f347a87b0e141a4e7d8c2e1f2260a3276418 +size 4697621832 diff --git a/model-00140-of-00193.safetensors b/model-00140-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cd52f1552a21bbe9facb6af6ac0d49b0fedbaabb --- /dev/null +++ b/model-00140-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10cf4a675aa55b0423bddb98fe31d3d1ebc29f8dd8f9ca364ecc7192c68aa990 +size 4999776384 diff --git a/model-00141-of-00193.safetensors b/model-00141-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bfacc1f93207c2be9d7f365a128aeb2e83d9fb94 --- /dev/null +++ b/model-00141-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4689a3c289cbb3fbad6f0a4bd6b862d5239f7a94c8a5e83b0d14fc20e6fb36e0 +size 4697621832 diff --git a/model-00142-of-00193.safetensors b/model-00142-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bf493f191b7a34cfa39af8c9eaedf8c59a2eab0b --- /dev/null +++ b/model-00142-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8de525df897fba17317140825d02c6150047befdc29754a07fc7d5dcf421122d +size 4999776384 diff --git a/model-00143-of-00193.safetensors b/model-00143-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5f85c0944173f122bcf7c91ef5133ebaa21c64c8 --- /dev/null +++ b/model-00143-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1da5e5f8dfb1183c61b622e2278b0c2f4f74be1e2ff633ebc5b31c5b129e9ca2 +size 4697621832 diff --git a/model-00144-of-00193.safetensors b/model-00144-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..468a419ddc0c289c51c5c804144f6786a5772fa4 --- /dev/null +++ b/model-00144-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd59e15d6317def9b273addc31fee05cc5d4d9caafc5dfa95dc0c3d1c482a174 +size 4697621832 diff --git a/model-00145-of-00193.safetensors b/model-00145-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..497bcd487567bc204b02219ff0187dd74c4522af --- /dev/null +++ b/model-00145-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb0ff8ddf9d45372bb79cc1e1d284c3d6556d4f6fde4856c9e147e2ab46002d6 +size 4999776384 diff --git a/model-00146-of-00193.safetensors b/model-00146-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a2dbe1cb084bb50bdf8fe979c403245cfde628f0 --- /dev/null +++ b/model-00146-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca2d6a93888d951daa9d1b715e4eaacc5423e15cdd464dfed996cde211b7278a +size 4697621832 diff --git a/model-00147-of-00193.safetensors b/model-00147-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..176a065e05156ab4520980a77b7f13fe1355e87e --- /dev/null +++ b/model-00147-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4a2059a767a8c18b14f729348f15218521dd2d57648f68179800b3eaca91715 +size 4999776384 diff --git a/model-00148-of-00193.safetensors b/model-00148-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9518f507eeaaf79367613b2187fd6efdbf119f09 --- /dev/null +++ b/model-00148-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90d0d8f31c373756e4de60dbe5d952d6aa590e1abea7619d9c109821d26b6561 +size 4697621832 diff --git a/model-00149-of-00193.safetensors b/model-00149-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bc789789e40e7edf51eb2254f13de9813f098b60 --- /dev/null +++ b/model-00149-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4fcab319563045c071bd7f02cdd03baa711959c860a3039bd808bb0ab9cf311 +size 4697621832 diff --git a/model-00150-of-00193.safetensors b/model-00150-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..382efe12709e8f51aad9a36aa58f92a4ae59b540 --- /dev/null +++ b/model-00150-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5da83c29b33ca958ccb04984fbb469874f37f0653733421e52a09678566b0f3 +size 4999776376 diff --git a/model-00151-of-00193.safetensors b/model-00151-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f627f0cd63737f9b2b35af2df47f817173028d7a --- /dev/null +++ b/model-00151-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:444a51010a38794edd52a6bc822fdebcfa2dd84d86a1ffb0d5e2cb60d183de78 +size 4697621832 diff --git a/model-00152-of-00193.safetensors b/model-00152-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0465e4af865570ce437760ffa37000e9897d33d2 --- /dev/null +++ b/model-00152-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bde4309d332177b39cb4c59b77b685edd861d1f920d280c214f1e600e8bcd1de +size 4999776384 diff --git a/model-00153-of-00193.safetensors b/model-00153-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b3d071125ca0d7c620a1603b370430d33b054342 --- /dev/null +++ b/model-00153-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d045926a006646b6a5721c85ee9a716861c1b9aac4c172a9f10c5f6d14525f5 +size 4697621832 diff --git a/model-00154-of-00193.safetensors b/model-00154-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a018d9b4087454763ea805320bb63843a5909d1c --- /dev/null +++ b/model-00154-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7e14ec8337fa2a4ff214e6901bf417889a28a5dafb287fa0f50796ce162c92c +size 4999776384 diff --git a/model-00155-of-00193.safetensors b/model-00155-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f1ce407a46f0da385cc7e95dccad5c0f8a714737 --- /dev/null +++ b/model-00155-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:261762864b33ef1588b2c0c4d23d646bfe077458c8df300538850b4775601e4c +size 4697621832 diff --git a/model-00156-of-00193.safetensors b/model-00156-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..beeb42ca77384ab124b69307bd82780463537b94 --- /dev/null +++ b/model-00156-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b58754d7d13a954822d75ba7938f39de374f4ebe254e1bc3161cb934e698803b +size 4697621832 diff --git a/model-00157-of-00193.safetensors b/model-00157-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2b9e565623bdafeb2d541abfc6e989a3828adb02 --- /dev/null +++ b/model-00157-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc95ca083a16e5f48229ecc35793f81b80b34bb25a4acf828c87a83960956c39 +size 4999776384 diff --git a/model-00158-of-00193.safetensors b/model-00158-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6114b4df5d2b0f0227c699edbc7711933dc445f6 --- /dev/null +++ b/model-00158-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3bdbafd497e186ead6eeac135bfa7803d1a12a29b6b4198a961542e92972fa5a +size 4697621832 diff --git a/model-00159-of-00193.safetensors b/model-00159-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..477616e2e736fa5ef957f3a6cd42434443d03afb --- /dev/null +++ b/model-00159-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f03a0d3f9040c798335509fa71dd561b223bf39e6a1ac6f8f20cfaf85d8f83e9 +size 4999776384 diff --git a/model-00160-of-00193.safetensors b/model-00160-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3e878f3b18ec2ca0127a3bf62142f4b94a9d8960 --- /dev/null +++ b/model-00160-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1452a9496aa9b0c12c5e45a1b4ea0b32d4e1310bb9e3d0c01a7579fec566b5ce +size 4697621832 diff --git a/model-00161-of-00193.safetensors b/model-00161-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0283dd343a655ba1d4811922ee186a7b98a6d923 --- /dev/null +++ b/model-00161-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a785ef6e98ef89e0201f7d65429643e786ac68167f574163263987ce0f1fbe0f +size 4697621832 diff --git a/model-00162-of-00193.safetensors b/model-00162-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..89da60b7b790d965f593267d91d65cf03e636b5e --- /dev/null +++ b/model-00162-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c5ba67ebbc6cabfd9a2b63c22b950440640429a4722335e0e980ef7cdb76ca03 +size 4999776376 diff --git a/model-00163-of-00193.safetensors b/model-00163-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2b33b50f9e0d7d7e3e1be721eec36d7da406b7e6 --- /dev/null +++ b/model-00163-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ede8434dabd70cb7195dabb6b6620a8d8fcef66cf53e973dc3cd87fd9c5752de +size 4697621832 diff --git a/model-00164-of-00193.safetensors b/model-00164-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3f87c9a833c2a0a2e4f479de8dab02eda3c7b304 --- /dev/null +++ b/model-00164-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:150f015a41af36ee9c3145678e9ca953ba16f0b53b6ce2a4d39727fbae9547c2 +size 4999776384 diff --git a/model-00165-of-00193.safetensors b/model-00165-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9c23e5b51bc33ba0df2a751c669d0f56dca58d9d --- /dev/null +++ b/model-00165-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c27d9aed540fe855dd333b85497e7257c923a3b356709cae2b022909c7dbaee0 +size 4697621832 diff --git a/model-00166-of-00193.safetensors b/model-00166-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ea61ecfc766f4054f13ab43264facf6b1b2c807d --- /dev/null +++ b/model-00166-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6579719faf3b1651d1e863b876d5703379146d94cb2d36e0f645fe7fc1acdce +size 4999776384 diff --git a/model-00167-of-00193.safetensors b/model-00167-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..591bcc6974f15ed776b187ae9dd49d8759d6ce66 --- /dev/null +++ b/model-00167-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8dd44065971478845f05a9ba7338792cfabaa2b7bb9da1c9f3509b4451bd4de8 +size 4697621832 diff --git a/model-00168-of-00193.safetensors b/model-00168-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b1ab6cc0298862474d058510290171debd544bc7 --- /dev/null +++ b/model-00168-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66b1328939130e6d5132887563737669b765df64ce60d2e6ba3d7dee70d81db4 +size 4697621832 diff --git a/model-00169-of-00193.safetensors b/model-00169-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6b67a7b399dbbb44f1eced4770c0d9a028b9ca3e --- /dev/null +++ b/model-00169-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea110126c92ff7abed12927c59dbf9405afd3e5b3d2f8a926417c301af826dfe +size 4999776384 diff --git a/model-00170-of-00193.safetensors b/model-00170-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6f96e494e250d76e131c1ceed00780377844cd06 --- /dev/null +++ b/model-00170-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b2de104d5957d20eb75005c6b0370413a8b362dd2d206b17a25ece9e64a50d5 +size 4697621832 diff --git a/model-00171-of-00193.safetensors b/model-00171-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0f5548212ed8ab7b4c130f4174f736a505ddc846 --- /dev/null +++ b/model-00171-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca45a4797f9d6622d57d9f88e84a39d43b65b4b9f8e0dbc3e29c0740312f6028 +size 4999776384 diff --git a/model-00172-of-00193.safetensors b/model-00172-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f3eb014b7b0da718232d6770b0a1294fbaabd8bd --- /dev/null +++ b/model-00172-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ea725bcbfffb34a271e647db518e4f1507efdb30c997f2ef2b631e0f5914e5e +size 4697621832 diff --git a/model-00173-of-00193.safetensors b/model-00173-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ae0688fe9a0d8c315b7a7c13f30b98d617339c77 --- /dev/null +++ b/model-00173-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5d7030d3ad2e0024b1921e9d6c7b6a40ccad4351f91aeba61755b25919014c1 +size 4697621832 diff --git a/model-00174-of-00193.safetensors b/model-00174-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8629cb4a2f706375ad499b106bfa3a67fda5b4d4 --- /dev/null +++ b/model-00174-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8d1c690afb97e86a95590cbf23b3ed7af88e40470fc5f8a94b1d20c161a33f6 +size 4999776376 diff --git a/model-00175-of-00193.safetensors b/model-00175-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4eae12449a381c8a6a88b7cfeae5c44d4c606a85 --- /dev/null +++ b/model-00175-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d28138a0069504752efa3f608a7ee42e8dc2959d0073b518122f50ac8baf5bf +size 4697621832 diff --git a/model-00176-of-00193.safetensors b/model-00176-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6afe7a2a1f7ceed7452bb5d4006e2a0aefc79554 --- /dev/null +++ b/model-00176-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0068977aef54a5f418b76edf25d212b4763dcc98ca9368bf743bd392e04f3275 +size 4999776384 diff --git a/model-00177-of-00193.safetensors b/model-00177-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0b0805768bb8e71887960b60d44221e53eb7648b --- /dev/null +++ b/model-00177-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53b8d81993e35d0f2d3ed8f32039ff2be71d81be73d27ea0dad8e4926585ca17 +size 4697621832 diff --git a/model-00178-of-00193.safetensors b/model-00178-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5b7d51f984db1e5b930c7008809af5f83b598a52 --- /dev/null +++ b/model-00178-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c47d29e32e0a3f11ebdb903eb6fb02b812679d1e1c429195a63b5fd14a0b98c +size 4999776384 diff --git a/model-00179-of-00193.safetensors b/model-00179-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c33bf6039ba088515f05b07c461ed9e9159c693e --- /dev/null +++ b/model-00179-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6864c1e6ee363834ff838b68827965583d90086758ad5e6243c6b3df86843b5b +size 4697621832 diff --git a/model-00180-of-00193.safetensors b/model-00180-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..136287d31a0262637b71e4d2933ab6f50cfe4c3e --- /dev/null +++ b/model-00180-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:daf8ef963b07f158a4189a1070ed2a02404884ebc9f8d9c1723bd9da4d2a215d +size 4697621832 diff --git a/model-00181-of-00193.safetensors b/model-00181-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e04d65093ffd593ca4c85f720da5e88de1e3b77c --- /dev/null +++ b/model-00181-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3336066e10f5b54758a0039f396ce589c407c88b55454a31ccfdd1b44cbd71c +size 4999776384 diff --git a/model-00182-of-00193.safetensors b/model-00182-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..621fcdd28e05b6d55631a9b2efff71dc682c7356 --- /dev/null +++ b/model-00182-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0169e473f87e2da7ecd2678abbf6f7cb68ccae0adf064d9e32b389e0bf050d4e +size 4697621832 diff --git a/model-00183-of-00193.safetensors b/model-00183-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3bfea5c83331b8c96ac0d2faa1d94a02b154fc44 --- /dev/null +++ b/model-00183-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c8d3a5903f4d2a997c5de29c6b8b6aa78e685f004c685c60be02271e0a9f30e +size 4999776384 diff --git a/model-00184-of-00193.safetensors b/model-00184-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..78cade2b6d997bdc6fb8b5056205671288b7dd86 --- /dev/null +++ b/model-00184-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fabfa52e7bf7954bb0376a507f823cc947d821a27d2ad852e3c09d5c85bad310 +size 4697621832 diff --git a/model-00185-of-00193.safetensors b/model-00185-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6dadce1b372d50363ce8f21b0b6d3b7604a844dd --- /dev/null +++ b/model-00185-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f87ba64d02c5cf134f68f06fd68543d64119388a53f7d5f318911c1cf9a9b3e +size 4697621832 diff --git a/model-00186-of-00193.safetensors b/model-00186-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8068d98412004ff3aa98a4503ebacc8a74eeee44 --- /dev/null +++ b/model-00186-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bff7e86b5f06044ee16861c9ca64558b3750fdf85516ffe079c3d5e01d7ceccc +size 4999776376 diff --git a/model-00187-of-00193.safetensors b/model-00187-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9d4d3c8eb976491c69e8d79d54abcc90418d9906 --- /dev/null +++ b/model-00187-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f4a6658e76322c3cce8100a78e10887dc164d6a45cda22caa70b1d0b8c13df5 +size 4697621832 diff --git a/model-00188-of-00193.safetensors b/model-00188-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..56e22353620b61a59b70a5aca3e54a5184a24d3b --- /dev/null +++ b/model-00188-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1114e67b5529b00f45efae2a23ca753a7db9229ebeab23ba0e0e3ed2117cac6c +size 4999776384 diff --git a/model-00189-of-00193.safetensors b/model-00189-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7fb36da8fe16554147fa6fde31d1389bb7d5cb0a --- /dev/null +++ b/model-00189-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a92dddcbad6c4e90d2756dd476d66d50ac42a208dd667fd878939806bb6f637 +size 4697621832 diff --git a/model-00190-of-00193.safetensors b/model-00190-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bc536f6d8e281c8898ac040d44d78fff34062971 --- /dev/null +++ b/model-00190-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fef5e43e2f1c4b203feae387e6c9c44e67aeb1313c8ca875955727538083e74e +size 4999776384 diff --git a/model-00191-of-00193.safetensors b/model-00191-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c0f8fa53436def7b976f5c82740ba6e33a15b8e1 --- /dev/null +++ b/model-00191-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aaa456d45ff800e87b0a2fed7993dcd0febaed70a2de1f5dc3987ade4403d5cb +size 4697621832 diff --git a/model-00192-of-00193.safetensors b/model-00192-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..25e7c75d6f4981ef586e5b70a1c10806dcf9924d --- /dev/null +++ b/model-00192-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ba83361fd13982e136d787838f88ee3fe72aba6021f50b11dc63e0e8c285bff +size 4697621832 diff --git a/model-00193-of-00193.safetensors b/model-00193-of-00193.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a6968b90d4f7765f1704ea213d47fd91ac442d36 --- /dev/null +++ b/model-00193-of-00193.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd5df7cfbc689f09a98e6f21ce3f8e5a3148135a32d1287ce6808f8a2aacab40 +size 3556819928 diff --git a/model.safetensors.index.json b/model.safetensors.index.json new file mode 100644 index 0000000000000000000000000000000000000000..697fb2e6d8d7810996873a10f8e09cd5f9faba1f --- /dev/null +++ b/model.safetensors.index.json @@ -0,0 +1,2490 @@ +{ + "metadata": { + "total_size": 929470889984 + }, + "weight_map": { + "lm_head.weight": "model-00193-of-00193.safetensors", + "model.embed_tokens.weight": "model-00001-of-00193.safetensors", + "model.layers.0.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00193.safetensors", + "model.layers.0.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00193.safetensors", + "model.layers.0.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00193.safetensors", + "model.layers.0.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00193.safetensors", + "model.layers.0.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00193.safetensors", + "model.layers.0.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00193.safetensors", + "model.layers.0.block_sparse_moe.experts.2.w1.weight": "model-00002-of-00193.safetensors", + "model.layers.0.block_sparse_moe.experts.2.w2.weight": "model-00002-of-00193.safetensors", + "model.layers.0.block_sparse_moe.experts.2.w3.weight": "model-00002-of-00193.safetensors", + "model.layers.0.block_sparse_moe.experts.3.w1.weight": "model-00002-of-00193.safetensors", + "model.layers.0.block_sparse_moe.experts.3.w2.weight": "model-00002-of-00193.safetensors", + "model.layers.0.block_sparse_moe.experts.3.w3.weight": "model-00002-of-00193.safetensors", + "model.layers.0.block_sparse_moe.experts.4.w1.weight": "model-00002-of-00193.safetensors", + "model.layers.0.block_sparse_moe.experts.4.w2.weight": "model-00002-of-00193.safetensors", + "model.layers.0.block_sparse_moe.experts.4.w3.weight": "model-00002-of-00193.safetensors", + "model.layers.0.block_sparse_moe.experts.5.w1.weight": "model-00002-of-00193.safetensors", + "model.layers.0.block_sparse_moe.experts.5.w2.weight": "model-00003-of-00193.safetensors", + "model.layers.0.block_sparse_moe.experts.5.w3.weight": "model-00003-of-00193.safetensors", + "model.layers.0.block_sparse_moe.experts.6.w1.weight": "model-00003-of-00193.safetensors", + "model.layers.0.block_sparse_moe.experts.6.w2.weight": "model-00003-of-00193.safetensors", + "model.layers.0.block_sparse_moe.experts.6.w3.weight": "model-00003-of-00193.safetensors", + "model.layers.0.block_sparse_moe.experts.7.w1.weight": "model-00003-of-00193.safetensors", + "model.layers.0.block_sparse_moe.experts.7.w2.weight": "model-00003-of-00193.safetensors", + "model.layers.0.block_sparse_moe.experts.7.w3.weight": "model-00003-of-00193.safetensors", + "model.layers.0.block_sparse_moe.gate.weight": "model-00001-of-00193.safetensors", + "model.layers.0.input_layernorm.weight": "model-00003-of-00193.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00003-of-00193.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00193.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00193.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00193.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00193.safetensors", + "model.layers.1.block_sparse_moe.experts.0.w1.weight": "model-00003-of-00193.safetensors", + "model.layers.1.block_sparse_moe.experts.0.w2.weight": "model-00003-of-00193.safetensors", + "model.layers.1.block_sparse_moe.experts.0.w3.weight": "model-00004-of-00193.safetensors", + "model.layers.1.block_sparse_moe.experts.1.w1.weight": "model-00004-of-00193.safetensors", + "model.layers.1.block_sparse_moe.experts.1.w2.weight": "model-00004-of-00193.safetensors", + "model.layers.1.block_sparse_moe.experts.1.w3.weight": "model-00004-of-00193.safetensors", + "model.layers.1.block_sparse_moe.experts.2.w1.weight": "model-00004-of-00193.safetensors", + "model.layers.1.block_sparse_moe.experts.2.w2.weight": "model-00004-of-00193.safetensors", + "model.layers.1.block_sparse_moe.experts.2.w3.weight": "model-00004-of-00193.safetensors", + "model.layers.1.block_sparse_moe.experts.3.w1.weight": "model-00004-of-00193.safetensors", + "model.layers.1.block_sparse_moe.experts.3.w2.weight": "model-00004-of-00193.safetensors", + "model.layers.1.block_sparse_moe.experts.3.w3.weight": "model-00004-of-00193.safetensors", + "model.layers.1.block_sparse_moe.experts.4.w1.weight": "model-00005-of-00193.safetensors", + "model.layers.1.block_sparse_moe.experts.4.w2.weight": "model-00005-of-00193.safetensors", + "model.layers.1.block_sparse_moe.experts.4.w3.weight": "model-00005-of-00193.safetensors", + "model.layers.1.block_sparse_moe.experts.5.w1.weight": "model-00005-of-00193.safetensors", + "model.layers.1.block_sparse_moe.experts.5.w2.weight": "model-00005-of-00193.safetensors", + "model.layers.1.block_sparse_moe.experts.5.w3.weight": "model-00005-of-00193.safetensors", + "model.layers.1.block_sparse_moe.experts.6.w1.weight": "model-00005-of-00193.safetensors", + "model.layers.1.block_sparse_moe.experts.6.w2.weight": "model-00005-of-00193.safetensors", + "model.layers.1.block_sparse_moe.experts.6.w3.weight": "model-00005-of-00193.safetensors", + "model.layers.1.block_sparse_moe.experts.7.w1.weight": "model-00005-of-00193.safetensors", + "model.layers.1.block_sparse_moe.experts.7.w2.weight": "model-00006-of-00193.safetensors", + "model.layers.1.block_sparse_moe.experts.7.w3.weight": "model-00006-of-00193.safetensors", + "model.layers.1.block_sparse_moe.gate.weight": "model-00003-of-00193.safetensors", + "model.layers.1.input_layernorm.weight": "model-00006-of-00193.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00006-of-00193.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00003-of-00193.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00003-of-00193.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00003-of-00193.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00003-of-00193.safetensors", + "model.layers.10.block_sparse_moe.experts.0.w1.weight": "model-00025-of-00193.safetensors", + "model.layers.10.block_sparse_moe.experts.0.w2.weight": "model-00025-of-00193.safetensors", + "model.layers.10.block_sparse_moe.experts.0.w3.weight": "model-00025-of-00193.safetensors", + "model.layers.10.block_sparse_moe.experts.1.w1.weight": "model-00025-of-00193.safetensors", + "model.layers.10.block_sparse_moe.experts.1.w2.weight": "model-00025-of-00193.safetensors", + "model.layers.10.block_sparse_moe.experts.1.w3.weight": "model-00025-of-00193.safetensors", + "model.layers.10.block_sparse_moe.experts.2.w1.weight": "model-00026-of-00193.safetensors", + "model.layers.10.block_sparse_moe.experts.2.w2.weight": "model-00026-of-00193.safetensors", + "model.layers.10.block_sparse_moe.experts.2.w3.weight": "model-00026-of-00193.safetensors", + "model.layers.10.block_sparse_moe.experts.3.w1.weight": "model-00026-of-00193.safetensors", + "model.layers.10.block_sparse_moe.experts.3.w2.weight": "model-00026-of-00193.safetensors", + "model.layers.10.block_sparse_moe.experts.3.w3.weight": "model-00026-of-00193.safetensors", + "model.layers.10.block_sparse_moe.experts.4.w1.weight": "model-00026-of-00193.safetensors", + "model.layers.10.block_sparse_moe.experts.4.w2.weight": "model-00026-of-00193.safetensors", + "model.layers.10.block_sparse_moe.experts.4.w3.weight": "model-00026-of-00193.safetensors", + "model.layers.10.block_sparse_moe.experts.5.w1.weight": "model-00026-of-00193.safetensors", + "model.layers.10.block_sparse_moe.experts.5.w2.weight": "model-00027-of-00193.safetensors", + "model.layers.10.block_sparse_moe.experts.5.w3.weight": "model-00027-of-00193.safetensors", + "model.layers.10.block_sparse_moe.experts.6.w1.weight": "model-00027-of-00193.safetensors", + "model.layers.10.block_sparse_moe.experts.6.w2.weight": "model-00027-of-00193.safetensors", + "model.layers.10.block_sparse_moe.experts.6.w3.weight": "model-00027-of-00193.safetensors", + "model.layers.10.block_sparse_moe.experts.7.w1.weight": "model-00027-of-00193.safetensors", + "model.layers.10.block_sparse_moe.experts.7.w2.weight": "model-00027-of-00193.safetensors", + "model.layers.10.block_sparse_moe.experts.7.w3.weight": "model-00027-of-00193.safetensors", + "model.layers.10.block_sparse_moe.gate.weight": "model-00025-of-00193.safetensors", + "model.layers.10.input_layernorm.weight": "model-00027-of-00193.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00027-of-00193.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00025-of-00193.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00025-of-00193.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00025-of-00193.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00025-of-00193.safetensors", + "model.layers.11.block_sparse_moe.experts.0.w1.weight": "model-00027-of-00193.safetensors", + "model.layers.11.block_sparse_moe.experts.0.w2.weight": "model-00027-of-00193.safetensors", + "model.layers.11.block_sparse_moe.experts.0.w3.weight": "model-00028-of-00193.safetensors", + "model.layers.11.block_sparse_moe.experts.1.w1.weight": "model-00028-of-00193.safetensors", + "model.layers.11.block_sparse_moe.experts.1.w2.weight": "model-00028-of-00193.safetensors", + "model.layers.11.block_sparse_moe.experts.1.w3.weight": "model-00028-of-00193.safetensors", + "model.layers.11.block_sparse_moe.experts.2.w1.weight": "model-00028-of-00193.safetensors", + "model.layers.11.block_sparse_moe.experts.2.w2.weight": "model-00028-of-00193.safetensors", + "model.layers.11.block_sparse_moe.experts.2.w3.weight": "model-00028-of-00193.safetensors", + "model.layers.11.block_sparse_moe.experts.3.w1.weight": "model-00028-of-00193.safetensors", + "model.layers.11.block_sparse_moe.experts.3.w2.weight": "model-00028-of-00193.safetensors", + "model.layers.11.block_sparse_moe.experts.3.w3.weight": "model-00028-of-00193.safetensors", + "model.layers.11.block_sparse_moe.experts.4.w1.weight": "model-00029-of-00193.safetensors", + "model.layers.11.block_sparse_moe.experts.4.w2.weight": "model-00029-of-00193.safetensors", + "model.layers.11.block_sparse_moe.experts.4.w3.weight": "model-00029-of-00193.safetensors", + "model.layers.11.block_sparse_moe.experts.5.w1.weight": "model-00029-of-00193.safetensors", + "model.layers.11.block_sparse_moe.experts.5.w2.weight": "model-00029-of-00193.safetensors", + "model.layers.11.block_sparse_moe.experts.5.w3.weight": "model-00029-of-00193.safetensors", + "model.layers.11.block_sparse_moe.experts.6.w1.weight": "model-00029-of-00193.safetensors", + "model.layers.11.block_sparse_moe.experts.6.w2.weight": "model-00029-of-00193.safetensors", + "model.layers.11.block_sparse_moe.experts.6.w3.weight": "model-00029-of-00193.safetensors", + "model.layers.11.block_sparse_moe.experts.7.w1.weight": "model-00029-of-00193.safetensors", + "model.layers.11.block_sparse_moe.experts.7.w2.weight": "model-00030-of-00193.safetensors", + "model.layers.11.block_sparse_moe.experts.7.w3.weight": "model-00030-of-00193.safetensors", + "model.layers.11.block_sparse_moe.gate.weight": "model-00027-of-00193.safetensors", + "model.layers.11.input_layernorm.weight": "model-00030-of-00193.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00030-of-00193.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00027-of-00193.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00027-of-00193.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00027-of-00193.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00027-of-00193.safetensors", + "model.layers.12.block_sparse_moe.experts.0.w1.weight": "model-00030-of-00193.safetensors", + "model.layers.12.block_sparse_moe.experts.0.w2.weight": "model-00030-of-00193.safetensors", + "model.layers.12.block_sparse_moe.experts.0.w3.weight": "model-00030-of-00193.safetensors", + "model.layers.12.block_sparse_moe.experts.1.w1.weight": "model-00030-of-00193.safetensors", + "model.layers.12.block_sparse_moe.experts.1.w2.weight": "model-00030-of-00193.safetensors", + "model.layers.12.block_sparse_moe.experts.1.w3.weight": "model-00030-of-00193.safetensors", + "model.layers.12.block_sparse_moe.experts.2.w1.weight": "model-00030-of-00193.safetensors", + "model.layers.12.block_sparse_moe.experts.2.w2.weight": "model-00030-of-00193.safetensors", + "model.layers.12.block_sparse_moe.experts.2.w3.weight": "model-00031-of-00193.safetensors", + "model.layers.12.block_sparse_moe.experts.3.w1.weight": "model-00031-of-00193.safetensors", + "model.layers.12.block_sparse_moe.experts.3.w2.weight": "model-00031-of-00193.safetensors", + "model.layers.12.block_sparse_moe.experts.3.w3.weight": "model-00031-of-00193.safetensors", + "model.layers.12.block_sparse_moe.experts.4.w1.weight": "model-00031-of-00193.safetensors", + "model.layers.12.block_sparse_moe.experts.4.w2.weight": "model-00031-of-00193.safetensors", + "model.layers.12.block_sparse_moe.experts.4.w3.weight": "model-00031-of-00193.safetensors", + "model.layers.12.block_sparse_moe.experts.5.w1.weight": "model-00031-of-00193.safetensors", + "model.layers.12.block_sparse_moe.experts.5.w2.weight": "model-00031-of-00193.safetensors", + "model.layers.12.block_sparse_moe.experts.5.w3.weight": "model-00031-of-00193.safetensors", + "model.layers.12.block_sparse_moe.experts.6.w1.weight": "model-00032-of-00193.safetensors", + "model.layers.12.block_sparse_moe.experts.6.w2.weight": "model-00032-of-00193.safetensors", + "model.layers.12.block_sparse_moe.experts.6.w3.weight": "model-00032-of-00193.safetensors", + "model.layers.12.block_sparse_moe.experts.7.w1.weight": "model-00032-of-00193.safetensors", + "model.layers.12.block_sparse_moe.experts.7.w2.weight": "model-00032-of-00193.safetensors", + "model.layers.12.block_sparse_moe.experts.7.w3.weight": "model-00032-of-00193.safetensors", + "model.layers.12.block_sparse_moe.gate.weight": "model-00030-of-00193.safetensors", + "model.layers.12.input_layernorm.weight": "model-00032-of-00193.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00032-of-00193.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00030-of-00193.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00030-of-00193.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00030-of-00193.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00030-of-00193.safetensors", + "model.layers.13.block_sparse_moe.experts.0.w1.weight": "model-00032-of-00193.safetensors", + "model.layers.13.block_sparse_moe.experts.0.w2.weight": "model-00032-of-00193.safetensors", + "model.layers.13.block_sparse_moe.experts.0.w3.weight": "model-00032-of-00193.safetensors", + "model.layers.13.block_sparse_moe.experts.1.w1.weight": "model-00032-of-00193.safetensors", + "model.layers.13.block_sparse_moe.experts.1.w2.weight": "model-00033-of-00193.safetensors", + "model.layers.13.block_sparse_moe.experts.1.w3.weight": "model-00033-of-00193.safetensors", + "model.layers.13.block_sparse_moe.experts.2.w1.weight": "model-00033-of-00193.safetensors", + "model.layers.13.block_sparse_moe.experts.2.w2.weight": "model-00033-of-00193.safetensors", + "model.layers.13.block_sparse_moe.experts.2.w3.weight": "model-00033-of-00193.safetensors", + "model.layers.13.block_sparse_moe.experts.3.w1.weight": "model-00033-of-00193.safetensors", + "model.layers.13.block_sparse_moe.experts.3.w2.weight": "model-00033-of-00193.safetensors", + "model.layers.13.block_sparse_moe.experts.3.w3.weight": "model-00033-of-00193.safetensors", + "model.layers.13.block_sparse_moe.experts.4.w1.weight": "model-00033-of-00193.safetensors", + "model.layers.13.block_sparse_moe.experts.4.w2.weight": "model-00033-of-00193.safetensors", + "model.layers.13.block_sparse_moe.experts.4.w3.weight": "model-00034-of-00193.safetensors", + "model.layers.13.block_sparse_moe.experts.5.w1.weight": "model-00034-of-00193.safetensors", + "model.layers.13.block_sparse_moe.experts.5.w2.weight": "model-00034-of-00193.safetensors", + "model.layers.13.block_sparse_moe.experts.5.w3.weight": "model-00034-of-00193.safetensors", + "model.layers.13.block_sparse_moe.experts.6.w1.weight": "model-00034-of-00193.safetensors", + "model.layers.13.block_sparse_moe.experts.6.w2.weight": "model-00034-of-00193.safetensors", + "model.layers.13.block_sparse_moe.experts.6.w3.weight": "model-00034-of-00193.safetensors", + "model.layers.13.block_sparse_moe.experts.7.w1.weight": "model-00034-of-00193.safetensors", + "model.layers.13.block_sparse_moe.experts.7.w2.weight": "model-00034-of-00193.safetensors", + "model.layers.13.block_sparse_moe.experts.7.w3.weight": "model-00034-of-00193.safetensors", + "model.layers.13.block_sparse_moe.gate.weight": "model-00032-of-00193.safetensors", + "model.layers.13.input_layernorm.weight": "model-00034-of-00193.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00034-of-00193.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00032-of-00193.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00032-of-00193.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00032-of-00193.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00032-of-00193.safetensors", + "model.layers.14.block_sparse_moe.experts.0.w1.weight": "model-00035-of-00193.safetensors", + "model.layers.14.block_sparse_moe.experts.0.w2.weight": "model-00035-of-00193.safetensors", + "model.layers.14.block_sparse_moe.experts.0.w3.weight": "model-00035-of-00193.safetensors", + "model.layers.14.block_sparse_moe.experts.1.w1.weight": "model-00035-of-00193.safetensors", + "model.layers.14.block_sparse_moe.experts.1.w2.weight": "model-00035-of-00193.safetensors", + "model.layers.14.block_sparse_moe.experts.1.w3.weight": "model-00035-of-00193.safetensors", + "model.layers.14.block_sparse_moe.experts.2.w1.weight": "model-00035-of-00193.safetensors", + "model.layers.14.block_sparse_moe.experts.2.w2.weight": "model-00035-of-00193.safetensors", + "model.layers.14.block_sparse_moe.experts.2.w3.weight": "model-00035-of-00193.safetensors", + "model.layers.14.block_sparse_moe.experts.3.w1.weight": "model-00035-of-00193.safetensors", + "model.layers.14.block_sparse_moe.experts.3.w2.weight": "model-00036-of-00193.safetensors", + "model.layers.14.block_sparse_moe.experts.3.w3.weight": "model-00036-of-00193.safetensors", + "model.layers.14.block_sparse_moe.experts.4.w1.weight": "model-00036-of-00193.safetensors", + "model.layers.14.block_sparse_moe.experts.4.w2.weight": "model-00036-of-00193.safetensors", + "model.layers.14.block_sparse_moe.experts.4.w3.weight": "model-00036-of-00193.safetensors", + "model.layers.14.block_sparse_moe.experts.5.w1.weight": "model-00036-of-00193.safetensors", + "model.layers.14.block_sparse_moe.experts.5.w2.weight": "model-00036-of-00193.safetensors", + "model.layers.14.block_sparse_moe.experts.5.w3.weight": "model-00036-of-00193.safetensors", + "model.layers.14.block_sparse_moe.experts.6.w1.weight": "model-00036-of-00193.safetensors", + "model.layers.14.block_sparse_moe.experts.6.w2.weight": "model-00036-of-00193.safetensors", + "model.layers.14.block_sparse_moe.experts.6.w3.weight": "model-00037-of-00193.safetensors", + "model.layers.14.block_sparse_moe.experts.7.w1.weight": "model-00037-of-00193.safetensors", + "model.layers.14.block_sparse_moe.experts.7.w2.weight": "model-00037-of-00193.safetensors", + "model.layers.14.block_sparse_moe.experts.7.w3.weight": "model-00037-of-00193.safetensors", + "model.layers.14.block_sparse_moe.gate.weight": "model-00034-of-00193.safetensors", + "model.layers.14.input_layernorm.weight": "model-00037-of-00193.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00037-of-00193.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00034-of-00193.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00034-of-00193.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00034-of-00193.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00034-of-00193.safetensors", + "model.layers.15.block_sparse_moe.experts.0.w1.weight": "model-00037-of-00193.safetensors", + "model.layers.15.block_sparse_moe.experts.0.w2.weight": "model-00037-of-00193.safetensors", + "model.layers.15.block_sparse_moe.experts.0.w3.weight": "model-00037-of-00193.safetensors", + "model.layers.15.block_sparse_moe.experts.1.w1.weight": "model-00037-of-00193.safetensors", + "model.layers.15.block_sparse_moe.experts.1.w2.weight": "model-00037-of-00193.safetensors", + "model.layers.15.block_sparse_moe.experts.1.w3.weight": "model-00037-of-00193.safetensors", + "model.layers.15.block_sparse_moe.experts.2.w1.weight": "model-00038-of-00193.safetensors", + "model.layers.15.block_sparse_moe.experts.2.w2.weight": "model-00038-of-00193.safetensors", + "model.layers.15.block_sparse_moe.experts.2.w3.weight": "model-00038-of-00193.safetensors", + "model.layers.15.block_sparse_moe.experts.3.w1.weight": "model-00038-of-00193.safetensors", + "model.layers.15.block_sparse_moe.experts.3.w2.weight": "model-00038-of-00193.safetensors", + "model.layers.15.block_sparse_moe.experts.3.w3.weight": "model-00038-of-00193.safetensors", + "model.layers.15.block_sparse_moe.experts.4.w1.weight": "model-00038-of-00193.safetensors", + "model.layers.15.block_sparse_moe.experts.4.w2.weight": "model-00038-of-00193.safetensors", + "model.layers.15.block_sparse_moe.experts.4.w3.weight": "model-00038-of-00193.safetensors", + "model.layers.15.block_sparse_moe.experts.5.w1.weight": "model-00038-of-00193.safetensors", + "model.layers.15.block_sparse_moe.experts.5.w2.weight": "model-00039-of-00193.safetensors", + "model.layers.15.block_sparse_moe.experts.5.w3.weight": "model-00039-of-00193.safetensors", + "model.layers.15.block_sparse_moe.experts.6.w1.weight": "model-00039-of-00193.safetensors", + "model.layers.15.block_sparse_moe.experts.6.w2.weight": "model-00039-of-00193.safetensors", + "model.layers.15.block_sparse_moe.experts.6.w3.weight": "model-00039-of-00193.safetensors", + "model.layers.15.block_sparse_moe.experts.7.w1.weight": "model-00039-of-00193.safetensors", + "model.layers.15.block_sparse_moe.experts.7.w2.weight": "model-00039-of-00193.safetensors", + "model.layers.15.block_sparse_moe.experts.7.w3.weight": "model-00039-of-00193.safetensors", + "model.layers.15.block_sparse_moe.gate.weight": "model-00037-of-00193.safetensors", + "model.layers.15.input_layernorm.weight": "model-00039-of-00193.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00039-of-00193.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00037-of-00193.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00037-of-00193.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00037-of-00193.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00037-of-00193.safetensors", + "model.layers.16.block_sparse_moe.experts.0.w1.weight": "model-00039-of-00193.safetensors", + "model.layers.16.block_sparse_moe.experts.0.w2.weight": "model-00039-of-00193.safetensors", + "model.layers.16.block_sparse_moe.experts.0.w3.weight": "model-00040-of-00193.safetensors", + "model.layers.16.block_sparse_moe.experts.1.w1.weight": "model-00040-of-00193.safetensors", + "model.layers.16.block_sparse_moe.experts.1.w2.weight": "model-00040-of-00193.safetensors", + "model.layers.16.block_sparse_moe.experts.1.w3.weight": "model-00040-of-00193.safetensors", + "model.layers.16.block_sparse_moe.experts.2.w1.weight": "model-00040-of-00193.safetensors", + "model.layers.16.block_sparse_moe.experts.2.w2.weight": "model-00040-of-00193.safetensors", + "model.layers.16.block_sparse_moe.experts.2.w3.weight": "model-00040-of-00193.safetensors", + "model.layers.16.block_sparse_moe.experts.3.w1.weight": "model-00040-of-00193.safetensors", + "model.layers.16.block_sparse_moe.experts.3.w2.weight": "model-00040-of-00193.safetensors", + "model.layers.16.block_sparse_moe.experts.3.w3.weight": "model-00040-of-00193.safetensors", + "model.layers.16.block_sparse_moe.experts.4.w1.weight": "model-00041-of-00193.safetensors", + "model.layers.16.block_sparse_moe.experts.4.w2.weight": "model-00041-of-00193.safetensors", + "model.layers.16.block_sparse_moe.experts.4.w3.weight": "model-00041-of-00193.safetensors", + "model.layers.16.block_sparse_moe.experts.5.w1.weight": "model-00041-of-00193.safetensors", + "model.layers.16.block_sparse_moe.experts.5.w2.weight": "model-00041-of-00193.safetensors", + "model.layers.16.block_sparse_moe.experts.5.w3.weight": "model-00041-of-00193.safetensors", + "model.layers.16.block_sparse_moe.experts.6.w1.weight": "model-00041-of-00193.safetensors", + "model.layers.16.block_sparse_moe.experts.6.w2.weight": "model-00041-of-00193.safetensors", + "model.layers.16.block_sparse_moe.experts.6.w3.weight": "model-00041-of-00193.safetensors", + "model.layers.16.block_sparse_moe.experts.7.w1.weight": "model-00041-of-00193.safetensors", + "model.layers.16.block_sparse_moe.experts.7.w2.weight": "model-00042-of-00193.safetensors", + "model.layers.16.block_sparse_moe.experts.7.w3.weight": "model-00042-of-00193.safetensors", + "model.layers.16.block_sparse_moe.gate.weight": "model-00039-of-00193.safetensors", + "model.layers.16.input_layernorm.weight": "model-00042-of-00193.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00042-of-00193.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00039-of-00193.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00039-of-00193.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00039-of-00193.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00039-of-00193.safetensors", + "model.layers.17.block_sparse_moe.experts.0.w1.weight": "model-00042-of-00193.safetensors", + "model.layers.17.block_sparse_moe.experts.0.w2.weight": "model-00042-of-00193.safetensors", + "model.layers.17.block_sparse_moe.experts.0.w3.weight": "model-00042-of-00193.safetensors", + "model.layers.17.block_sparse_moe.experts.1.w1.weight": "model-00042-of-00193.safetensors", + "model.layers.17.block_sparse_moe.experts.1.w2.weight": "model-00042-of-00193.safetensors", + "model.layers.17.block_sparse_moe.experts.1.w3.weight": "model-00042-of-00193.safetensors", + "model.layers.17.block_sparse_moe.experts.2.w1.weight": "model-00042-of-00193.safetensors", + "model.layers.17.block_sparse_moe.experts.2.w2.weight": "model-00042-of-00193.safetensors", + "model.layers.17.block_sparse_moe.experts.2.w3.weight": "model-00043-of-00193.safetensors", + "model.layers.17.block_sparse_moe.experts.3.w1.weight": "model-00043-of-00193.safetensors", + "model.layers.17.block_sparse_moe.experts.3.w2.weight": "model-00043-of-00193.safetensors", + "model.layers.17.block_sparse_moe.experts.3.w3.weight": "model-00043-of-00193.safetensors", + "model.layers.17.block_sparse_moe.experts.4.w1.weight": "model-00043-of-00193.safetensors", + "model.layers.17.block_sparse_moe.experts.4.w2.weight": "model-00043-of-00193.safetensors", + "model.layers.17.block_sparse_moe.experts.4.w3.weight": "model-00043-of-00193.safetensors", + "model.layers.17.block_sparse_moe.experts.5.w1.weight": "model-00043-of-00193.safetensors", + "model.layers.17.block_sparse_moe.experts.5.w2.weight": "model-00043-of-00193.safetensors", + "model.layers.17.block_sparse_moe.experts.5.w3.weight": "model-00043-of-00193.safetensors", + "model.layers.17.block_sparse_moe.experts.6.w1.weight": "model-00044-of-00193.safetensors", + "model.layers.17.block_sparse_moe.experts.6.w2.weight": "model-00044-of-00193.safetensors", + "model.layers.17.block_sparse_moe.experts.6.w3.weight": "model-00044-of-00193.safetensors", + "model.layers.17.block_sparse_moe.experts.7.w1.weight": "model-00044-of-00193.safetensors", + "model.layers.17.block_sparse_moe.experts.7.w2.weight": "model-00044-of-00193.safetensors", + "model.layers.17.block_sparse_moe.experts.7.w3.weight": "model-00044-of-00193.safetensors", + "model.layers.17.block_sparse_moe.gate.weight": "model-00042-of-00193.safetensors", + "model.layers.17.input_layernorm.weight": "model-00044-of-00193.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00044-of-00193.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00042-of-00193.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00042-of-00193.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00042-of-00193.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00042-of-00193.safetensors", + "model.layers.18.block_sparse_moe.experts.0.w1.weight": "model-00044-of-00193.safetensors", + "model.layers.18.block_sparse_moe.experts.0.w2.weight": "model-00044-of-00193.safetensors", + "model.layers.18.block_sparse_moe.experts.0.w3.weight": "model-00044-of-00193.safetensors", + "model.layers.18.block_sparse_moe.experts.1.w1.weight": "model-00044-of-00193.safetensors", + "model.layers.18.block_sparse_moe.experts.1.w2.weight": "model-00045-of-00193.safetensors", + "model.layers.18.block_sparse_moe.experts.1.w3.weight": "model-00045-of-00193.safetensors", + "model.layers.18.block_sparse_moe.experts.2.w1.weight": "model-00045-of-00193.safetensors", + "model.layers.18.block_sparse_moe.experts.2.w2.weight": "model-00045-of-00193.safetensors", + "model.layers.18.block_sparse_moe.experts.2.w3.weight": "model-00045-of-00193.safetensors", + "model.layers.18.block_sparse_moe.experts.3.w1.weight": "model-00045-of-00193.safetensors", + "model.layers.18.block_sparse_moe.experts.3.w2.weight": "model-00045-of-00193.safetensors", + "model.layers.18.block_sparse_moe.experts.3.w3.weight": "model-00045-of-00193.safetensors", + "model.layers.18.block_sparse_moe.experts.4.w1.weight": "model-00045-of-00193.safetensors", + "model.layers.18.block_sparse_moe.experts.4.w2.weight": "model-00045-of-00193.safetensors", + "model.layers.18.block_sparse_moe.experts.4.w3.weight": "model-00046-of-00193.safetensors", + "model.layers.18.block_sparse_moe.experts.5.w1.weight": "model-00046-of-00193.safetensors", + "model.layers.18.block_sparse_moe.experts.5.w2.weight": "model-00046-of-00193.safetensors", + "model.layers.18.block_sparse_moe.experts.5.w3.weight": "model-00046-of-00193.safetensors", + "model.layers.18.block_sparse_moe.experts.6.w1.weight": "model-00046-of-00193.safetensors", + "model.layers.18.block_sparse_moe.experts.6.w2.weight": "model-00046-of-00193.safetensors", + "model.layers.18.block_sparse_moe.experts.6.w3.weight": "model-00046-of-00193.safetensors", + "model.layers.18.block_sparse_moe.experts.7.w1.weight": "model-00046-of-00193.safetensors", + "model.layers.18.block_sparse_moe.experts.7.w2.weight": "model-00046-of-00193.safetensors", + "model.layers.18.block_sparse_moe.experts.7.w3.weight": "model-00046-of-00193.safetensors", + "model.layers.18.block_sparse_moe.gate.weight": "model-00044-of-00193.safetensors", + "model.layers.18.input_layernorm.weight": "model-00046-of-00193.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00046-of-00193.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00044-of-00193.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00044-of-00193.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00044-of-00193.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00044-of-00193.safetensors", + "model.layers.19.block_sparse_moe.experts.0.w1.weight": "model-00047-of-00193.safetensors", + "model.layers.19.block_sparse_moe.experts.0.w2.weight": "model-00047-of-00193.safetensors", + "model.layers.19.block_sparse_moe.experts.0.w3.weight": "model-00047-of-00193.safetensors", + "model.layers.19.block_sparse_moe.experts.1.w1.weight": "model-00047-of-00193.safetensors", + "model.layers.19.block_sparse_moe.experts.1.w2.weight": "model-00047-of-00193.safetensors", + "model.layers.19.block_sparse_moe.experts.1.w3.weight": "model-00047-of-00193.safetensors", + "model.layers.19.block_sparse_moe.experts.2.w1.weight": "model-00047-of-00193.safetensors", + "model.layers.19.block_sparse_moe.experts.2.w2.weight": "model-00047-of-00193.safetensors", + "model.layers.19.block_sparse_moe.experts.2.w3.weight": "model-00047-of-00193.safetensors", + "model.layers.19.block_sparse_moe.experts.3.w1.weight": "model-00047-of-00193.safetensors", + "model.layers.19.block_sparse_moe.experts.3.w2.weight": "model-00048-of-00193.safetensors", + "model.layers.19.block_sparse_moe.experts.3.w3.weight": "model-00048-of-00193.safetensors", + "model.layers.19.block_sparse_moe.experts.4.w1.weight": "model-00048-of-00193.safetensors", + "model.layers.19.block_sparse_moe.experts.4.w2.weight": "model-00048-of-00193.safetensors", + "model.layers.19.block_sparse_moe.experts.4.w3.weight": "model-00048-of-00193.safetensors", + "model.layers.19.block_sparse_moe.experts.5.w1.weight": "model-00048-of-00193.safetensors", + "model.layers.19.block_sparse_moe.experts.5.w2.weight": "model-00048-of-00193.safetensors", + "model.layers.19.block_sparse_moe.experts.5.w3.weight": "model-00048-of-00193.safetensors", + "model.layers.19.block_sparse_moe.experts.6.w1.weight": "model-00048-of-00193.safetensors", + "model.layers.19.block_sparse_moe.experts.6.w2.weight": "model-00048-of-00193.safetensors", + "model.layers.19.block_sparse_moe.experts.6.w3.weight": "model-00049-of-00193.safetensors", + "model.layers.19.block_sparse_moe.experts.7.w1.weight": "model-00049-of-00193.safetensors", + "model.layers.19.block_sparse_moe.experts.7.w2.weight": "model-00049-of-00193.safetensors", + "model.layers.19.block_sparse_moe.experts.7.w3.weight": "model-00049-of-00193.safetensors", + "model.layers.19.block_sparse_moe.gate.weight": "model-00046-of-00193.safetensors", + "model.layers.19.input_layernorm.weight": "model-00049-of-00193.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00049-of-00193.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00046-of-00193.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00046-of-00193.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00046-of-00193.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00046-of-00193.safetensors", + "model.layers.2.block_sparse_moe.experts.0.w1.weight": "model-00006-of-00193.safetensors", + "model.layers.2.block_sparse_moe.experts.0.w2.weight": "model-00006-of-00193.safetensors", + "model.layers.2.block_sparse_moe.experts.0.w3.weight": "model-00006-of-00193.safetensors", + "model.layers.2.block_sparse_moe.experts.1.w1.weight": "model-00006-of-00193.safetensors", + "model.layers.2.block_sparse_moe.experts.1.w2.weight": "model-00006-of-00193.safetensors", + "model.layers.2.block_sparse_moe.experts.1.w3.weight": "model-00006-of-00193.safetensors", + "model.layers.2.block_sparse_moe.experts.2.w1.weight": "model-00006-of-00193.safetensors", + "model.layers.2.block_sparse_moe.experts.2.w2.weight": "model-00006-of-00193.safetensors", + "model.layers.2.block_sparse_moe.experts.2.w3.weight": "model-00007-of-00193.safetensors", + "model.layers.2.block_sparse_moe.experts.3.w1.weight": "model-00007-of-00193.safetensors", + "model.layers.2.block_sparse_moe.experts.3.w2.weight": "model-00007-of-00193.safetensors", + "model.layers.2.block_sparse_moe.experts.3.w3.weight": "model-00007-of-00193.safetensors", + "model.layers.2.block_sparse_moe.experts.4.w1.weight": "model-00007-of-00193.safetensors", + "model.layers.2.block_sparse_moe.experts.4.w2.weight": "model-00007-of-00193.safetensors", + "model.layers.2.block_sparse_moe.experts.4.w3.weight": "model-00007-of-00193.safetensors", + "model.layers.2.block_sparse_moe.experts.5.w1.weight": "model-00007-of-00193.safetensors", + "model.layers.2.block_sparse_moe.experts.5.w2.weight": "model-00007-of-00193.safetensors", + "model.layers.2.block_sparse_moe.experts.5.w3.weight": "model-00007-of-00193.safetensors", + "model.layers.2.block_sparse_moe.experts.6.w1.weight": "model-00008-of-00193.safetensors", + "model.layers.2.block_sparse_moe.experts.6.w2.weight": "model-00008-of-00193.safetensors", + "model.layers.2.block_sparse_moe.experts.6.w3.weight": "model-00008-of-00193.safetensors", + "model.layers.2.block_sparse_moe.experts.7.w1.weight": "model-00008-of-00193.safetensors", + "model.layers.2.block_sparse_moe.experts.7.w2.weight": "model-00008-of-00193.safetensors", + "model.layers.2.block_sparse_moe.experts.7.w3.weight": "model-00008-of-00193.safetensors", + "model.layers.2.block_sparse_moe.gate.weight": "model-00006-of-00193.safetensors", + "model.layers.2.input_layernorm.weight": "model-00008-of-00193.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00008-of-00193.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00006-of-00193.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00006-of-00193.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00006-of-00193.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00006-of-00193.safetensors", + "model.layers.20.block_sparse_moe.experts.0.w1.weight": "model-00049-of-00193.safetensors", + "model.layers.20.block_sparse_moe.experts.0.w2.weight": "model-00049-of-00193.safetensors", + "model.layers.20.block_sparse_moe.experts.0.w3.weight": "model-00049-of-00193.safetensors", + "model.layers.20.block_sparse_moe.experts.1.w1.weight": "model-00049-of-00193.safetensors", + "model.layers.20.block_sparse_moe.experts.1.w2.weight": "model-00049-of-00193.safetensors", + "model.layers.20.block_sparse_moe.experts.1.w3.weight": "model-00049-of-00193.safetensors", + "model.layers.20.block_sparse_moe.experts.2.w1.weight": "model-00050-of-00193.safetensors", + "model.layers.20.block_sparse_moe.experts.2.w2.weight": "model-00050-of-00193.safetensors", + "model.layers.20.block_sparse_moe.experts.2.w3.weight": "model-00050-of-00193.safetensors", + "model.layers.20.block_sparse_moe.experts.3.w1.weight": "model-00050-of-00193.safetensors", + "model.layers.20.block_sparse_moe.experts.3.w2.weight": "model-00050-of-00193.safetensors", + "model.layers.20.block_sparse_moe.experts.3.w3.weight": "model-00050-of-00193.safetensors", + "model.layers.20.block_sparse_moe.experts.4.w1.weight": "model-00050-of-00193.safetensors", + "model.layers.20.block_sparse_moe.experts.4.w2.weight": "model-00050-of-00193.safetensors", + "model.layers.20.block_sparse_moe.experts.4.w3.weight": "model-00050-of-00193.safetensors", + "model.layers.20.block_sparse_moe.experts.5.w1.weight": "model-00050-of-00193.safetensors", + "model.layers.20.block_sparse_moe.experts.5.w2.weight": "model-00051-of-00193.safetensors", + "model.layers.20.block_sparse_moe.experts.5.w3.weight": "model-00051-of-00193.safetensors", + "model.layers.20.block_sparse_moe.experts.6.w1.weight": "model-00051-of-00193.safetensors", + "model.layers.20.block_sparse_moe.experts.6.w2.weight": "model-00051-of-00193.safetensors", + "model.layers.20.block_sparse_moe.experts.6.w3.weight": "model-00051-of-00193.safetensors", + "model.layers.20.block_sparse_moe.experts.7.w1.weight": "model-00051-of-00193.safetensors", + "model.layers.20.block_sparse_moe.experts.7.w2.weight": "model-00051-of-00193.safetensors", + "model.layers.20.block_sparse_moe.experts.7.w3.weight": "model-00051-of-00193.safetensors", + "model.layers.20.block_sparse_moe.gate.weight": "model-00049-of-00193.safetensors", + "model.layers.20.input_layernorm.weight": "model-00051-of-00193.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00051-of-00193.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00049-of-00193.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00049-of-00193.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00049-of-00193.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00049-of-00193.safetensors", + "model.layers.21.block_sparse_moe.experts.0.w1.weight": "model-00051-of-00193.safetensors", + "model.layers.21.block_sparse_moe.experts.0.w2.weight": "model-00051-of-00193.safetensors", + "model.layers.21.block_sparse_moe.experts.0.w3.weight": "model-00052-of-00193.safetensors", + "model.layers.21.block_sparse_moe.experts.1.w1.weight": "model-00052-of-00193.safetensors", + "model.layers.21.block_sparse_moe.experts.1.w2.weight": "model-00052-of-00193.safetensors", + "model.layers.21.block_sparse_moe.experts.1.w3.weight": "model-00052-of-00193.safetensors", + "model.layers.21.block_sparse_moe.experts.2.w1.weight": "model-00052-of-00193.safetensors", + "model.layers.21.block_sparse_moe.experts.2.w2.weight": "model-00052-of-00193.safetensors", + "model.layers.21.block_sparse_moe.experts.2.w3.weight": "model-00052-of-00193.safetensors", + "model.layers.21.block_sparse_moe.experts.3.w1.weight": "model-00052-of-00193.safetensors", + "model.layers.21.block_sparse_moe.experts.3.w2.weight": "model-00052-of-00193.safetensors", + "model.layers.21.block_sparse_moe.experts.3.w3.weight": "model-00052-of-00193.safetensors", + "model.layers.21.block_sparse_moe.experts.4.w1.weight": "model-00053-of-00193.safetensors", + "model.layers.21.block_sparse_moe.experts.4.w2.weight": "model-00053-of-00193.safetensors", + "model.layers.21.block_sparse_moe.experts.4.w3.weight": "model-00053-of-00193.safetensors", + "model.layers.21.block_sparse_moe.experts.5.w1.weight": "model-00053-of-00193.safetensors", + "model.layers.21.block_sparse_moe.experts.5.w2.weight": "model-00053-of-00193.safetensors", + "model.layers.21.block_sparse_moe.experts.5.w3.weight": "model-00053-of-00193.safetensors", + "model.layers.21.block_sparse_moe.experts.6.w1.weight": "model-00053-of-00193.safetensors", + "model.layers.21.block_sparse_moe.experts.6.w2.weight": "model-00053-of-00193.safetensors", + "model.layers.21.block_sparse_moe.experts.6.w3.weight": "model-00053-of-00193.safetensors", + "model.layers.21.block_sparse_moe.experts.7.w1.weight": "model-00053-of-00193.safetensors", + "model.layers.21.block_sparse_moe.experts.7.w2.weight": "model-00054-of-00193.safetensors", + "model.layers.21.block_sparse_moe.experts.7.w3.weight": "model-00054-of-00193.safetensors", + "model.layers.21.block_sparse_moe.gate.weight": "model-00051-of-00193.safetensors", + "model.layers.21.input_layernorm.weight": "model-00054-of-00193.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00054-of-00193.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00051-of-00193.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00051-of-00193.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00051-of-00193.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00051-of-00193.safetensors", + "model.layers.22.block_sparse_moe.experts.0.w1.weight": "model-00054-of-00193.safetensors", + "model.layers.22.block_sparse_moe.experts.0.w2.weight": "model-00054-of-00193.safetensors", + "model.layers.22.block_sparse_moe.experts.0.w3.weight": "model-00054-of-00193.safetensors", + "model.layers.22.block_sparse_moe.experts.1.w1.weight": "model-00054-of-00193.safetensors", + "model.layers.22.block_sparse_moe.experts.1.w2.weight": "model-00054-of-00193.safetensors", + "model.layers.22.block_sparse_moe.experts.1.w3.weight": "model-00054-of-00193.safetensors", + "model.layers.22.block_sparse_moe.experts.2.w1.weight": "model-00054-of-00193.safetensors", + "model.layers.22.block_sparse_moe.experts.2.w2.weight": "model-00054-of-00193.safetensors", + "model.layers.22.block_sparse_moe.experts.2.w3.weight": "model-00055-of-00193.safetensors", + "model.layers.22.block_sparse_moe.experts.3.w1.weight": "model-00055-of-00193.safetensors", + "model.layers.22.block_sparse_moe.experts.3.w2.weight": "model-00055-of-00193.safetensors", + "model.layers.22.block_sparse_moe.experts.3.w3.weight": "model-00055-of-00193.safetensors", + "model.layers.22.block_sparse_moe.experts.4.w1.weight": "model-00055-of-00193.safetensors", + "model.layers.22.block_sparse_moe.experts.4.w2.weight": "model-00055-of-00193.safetensors", + "model.layers.22.block_sparse_moe.experts.4.w3.weight": "model-00055-of-00193.safetensors", + "model.layers.22.block_sparse_moe.experts.5.w1.weight": "model-00055-of-00193.safetensors", + "model.layers.22.block_sparse_moe.experts.5.w2.weight": "model-00055-of-00193.safetensors", + "model.layers.22.block_sparse_moe.experts.5.w3.weight": "model-00055-of-00193.safetensors", + "model.layers.22.block_sparse_moe.experts.6.w1.weight": "model-00056-of-00193.safetensors", + "model.layers.22.block_sparse_moe.experts.6.w2.weight": "model-00056-of-00193.safetensors", + "model.layers.22.block_sparse_moe.experts.6.w3.weight": "model-00056-of-00193.safetensors", + "model.layers.22.block_sparse_moe.experts.7.w1.weight": "model-00056-of-00193.safetensors", + "model.layers.22.block_sparse_moe.experts.7.w2.weight": "model-00056-of-00193.safetensors", + "model.layers.22.block_sparse_moe.experts.7.w3.weight": "model-00056-of-00193.safetensors", + "model.layers.22.block_sparse_moe.gate.weight": "model-00054-of-00193.safetensors", + "model.layers.22.input_layernorm.weight": "model-00056-of-00193.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00056-of-00193.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00054-of-00193.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00054-of-00193.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00054-of-00193.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00054-of-00193.safetensors", + "model.layers.23.block_sparse_moe.experts.0.w1.weight": "model-00056-of-00193.safetensors", + "model.layers.23.block_sparse_moe.experts.0.w2.weight": "model-00056-of-00193.safetensors", + "model.layers.23.block_sparse_moe.experts.0.w3.weight": "model-00056-of-00193.safetensors", + "model.layers.23.block_sparse_moe.experts.1.w1.weight": "model-00056-of-00193.safetensors", + "model.layers.23.block_sparse_moe.experts.1.w2.weight": "model-00057-of-00193.safetensors", + "model.layers.23.block_sparse_moe.experts.1.w3.weight": "model-00057-of-00193.safetensors", + "model.layers.23.block_sparse_moe.experts.2.w1.weight": "model-00057-of-00193.safetensors", + "model.layers.23.block_sparse_moe.experts.2.w2.weight": "model-00057-of-00193.safetensors", + "model.layers.23.block_sparse_moe.experts.2.w3.weight": "model-00057-of-00193.safetensors", + "model.layers.23.block_sparse_moe.experts.3.w1.weight": "model-00057-of-00193.safetensors", + "model.layers.23.block_sparse_moe.experts.3.w2.weight": "model-00057-of-00193.safetensors", + "model.layers.23.block_sparse_moe.experts.3.w3.weight": "model-00057-of-00193.safetensors", + "model.layers.23.block_sparse_moe.experts.4.w1.weight": "model-00057-of-00193.safetensors", + "model.layers.23.block_sparse_moe.experts.4.w2.weight": "model-00057-of-00193.safetensors", + "model.layers.23.block_sparse_moe.experts.4.w3.weight": "model-00058-of-00193.safetensors", + "model.layers.23.block_sparse_moe.experts.5.w1.weight": "model-00058-of-00193.safetensors", + "model.layers.23.block_sparse_moe.experts.5.w2.weight": "model-00058-of-00193.safetensors", + "model.layers.23.block_sparse_moe.experts.5.w3.weight": "model-00058-of-00193.safetensors", + "model.layers.23.block_sparse_moe.experts.6.w1.weight": "model-00058-of-00193.safetensors", + "model.layers.23.block_sparse_moe.experts.6.w2.weight": "model-00058-of-00193.safetensors", + "model.layers.23.block_sparse_moe.experts.6.w3.weight": "model-00058-of-00193.safetensors", + "model.layers.23.block_sparse_moe.experts.7.w1.weight": "model-00058-of-00193.safetensors", + "model.layers.23.block_sparse_moe.experts.7.w2.weight": "model-00058-of-00193.safetensors", + "model.layers.23.block_sparse_moe.experts.7.w3.weight": "model-00058-of-00193.safetensors", + "model.layers.23.block_sparse_moe.gate.weight": "model-00056-of-00193.safetensors", + "model.layers.23.input_layernorm.weight": "model-00058-of-00193.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00058-of-00193.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00056-of-00193.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00056-of-00193.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00056-of-00193.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00056-of-00193.safetensors", + "model.layers.24.block_sparse_moe.experts.0.w1.weight": "model-00059-of-00193.safetensors", + "model.layers.24.block_sparse_moe.experts.0.w2.weight": "model-00059-of-00193.safetensors", + "model.layers.24.block_sparse_moe.experts.0.w3.weight": "model-00059-of-00193.safetensors", + "model.layers.24.block_sparse_moe.experts.1.w1.weight": "model-00059-of-00193.safetensors", + "model.layers.24.block_sparse_moe.experts.1.w2.weight": "model-00059-of-00193.safetensors", + "model.layers.24.block_sparse_moe.experts.1.w3.weight": "model-00059-of-00193.safetensors", + "model.layers.24.block_sparse_moe.experts.2.w1.weight": "model-00059-of-00193.safetensors", + "model.layers.24.block_sparse_moe.experts.2.w2.weight": "model-00059-of-00193.safetensors", + "model.layers.24.block_sparse_moe.experts.2.w3.weight": "model-00059-of-00193.safetensors", + "model.layers.24.block_sparse_moe.experts.3.w1.weight": "model-00059-of-00193.safetensors", + "model.layers.24.block_sparse_moe.experts.3.w2.weight": "model-00060-of-00193.safetensors", + "model.layers.24.block_sparse_moe.experts.3.w3.weight": "model-00060-of-00193.safetensors", + "model.layers.24.block_sparse_moe.experts.4.w1.weight": "model-00060-of-00193.safetensors", + "model.layers.24.block_sparse_moe.experts.4.w2.weight": "model-00060-of-00193.safetensors", + "model.layers.24.block_sparse_moe.experts.4.w3.weight": "model-00060-of-00193.safetensors", + "model.layers.24.block_sparse_moe.experts.5.w1.weight": "model-00060-of-00193.safetensors", + "model.layers.24.block_sparse_moe.experts.5.w2.weight": "model-00060-of-00193.safetensors", + "model.layers.24.block_sparse_moe.experts.5.w3.weight": "model-00060-of-00193.safetensors", + "model.layers.24.block_sparse_moe.experts.6.w1.weight": "model-00060-of-00193.safetensors", + "model.layers.24.block_sparse_moe.experts.6.w2.weight": "model-00060-of-00193.safetensors", + "model.layers.24.block_sparse_moe.experts.6.w3.weight": "model-00061-of-00193.safetensors", + "model.layers.24.block_sparse_moe.experts.7.w1.weight": "model-00061-of-00193.safetensors", + "model.layers.24.block_sparse_moe.experts.7.w2.weight": "model-00061-of-00193.safetensors", + "model.layers.24.block_sparse_moe.experts.7.w3.weight": "model-00061-of-00193.safetensors", + "model.layers.24.block_sparse_moe.gate.weight": "model-00058-of-00193.safetensors", + "model.layers.24.input_layernorm.weight": "model-00061-of-00193.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00061-of-00193.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00058-of-00193.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00058-of-00193.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00058-of-00193.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00058-of-00193.safetensors", + "model.layers.25.block_sparse_moe.experts.0.w1.weight": "model-00061-of-00193.safetensors", + "model.layers.25.block_sparse_moe.experts.0.w2.weight": "model-00061-of-00193.safetensors", + "model.layers.25.block_sparse_moe.experts.0.w3.weight": "model-00061-of-00193.safetensors", + "model.layers.25.block_sparse_moe.experts.1.w1.weight": "model-00061-of-00193.safetensors", + "model.layers.25.block_sparse_moe.experts.1.w2.weight": "model-00061-of-00193.safetensors", + "model.layers.25.block_sparse_moe.experts.1.w3.weight": "model-00061-of-00193.safetensors", + "model.layers.25.block_sparse_moe.experts.2.w1.weight": "model-00062-of-00193.safetensors", + "model.layers.25.block_sparse_moe.experts.2.w2.weight": "model-00062-of-00193.safetensors", + "model.layers.25.block_sparse_moe.experts.2.w3.weight": "model-00062-of-00193.safetensors", + "model.layers.25.block_sparse_moe.experts.3.w1.weight": "model-00062-of-00193.safetensors", + "model.layers.25.block_sparse_moe.experts.3.w2.weight": "model-00062-of-00193.safetensors", + "model.layers.25.block_sparse_moe.experts.3.w3.weight": "model-00062-of-00193.safetensors", + "model.layers.25.block_sparse_moe.experts.4.w1.weight": "model-00062-of-00193.safetensors", + "model.layers.25.block_sparse_moe.experts.4.w2.weight": "model-00062-of-00193.safetensors", + "model.layers.25.block_sparse_moe.experts.4.w3.weight": "model-00062-of-00193.safetensors", + "model.layers.25.block_sparse_moe.experts.5.w1.weight": "model-00062-of-00193.safetensors", + "model.layers.25.block_sparse_moe.experts.5.w2.weight": "model-00063-of-00193.safetensors", + "model.layers.25.block_sparse_moe.experts.5.w3.weight": "model-00063-of-00193.safetensors", + "model.layers.25.block_sparse_moe.experts.6.w1.weight": "model-00063-of-00193.safetensors", + "model.layers.25.block_sparse_moe.experts.6.w2.weight": "model-00063-of-00193.safetensors", + "model.layers.25.block_sparse_moe.experts.6.w3.weight": "model-00063-of-00193.safetensors", + "model.layers.25.block_sparse_moe.experts.7.w1.weight": "model-00063-of-00193.safetensors", + "model.layers.25.block_sparse_moe.experts.7.w2.weight": "model-00063-of-00193.safetensors", + "model.layers.25.block_sparse_moe.experts.7.w3.weight": "model-00063-of-00193.safetensors", + "model.layers.25.block_sparse_moe.gate.weight": "model-00061-of-00193.safetensors", + "model.layers.25.input_layernorm.weight": "model-00063-of-00193.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00063-of-00193.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00061-of-00193.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00061-of-00193.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00061-of-00193.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00061-of-00193.safetensors", + "model.layers.26.block_sparse_moe.experts.0.w1.weight": "model-00063-of-00193.safetensors", + "model.layers.26.block_sparse_moe.experts.0.w2.weight": "model-00063-of-00193.safetensors", + "model.layers.26.block_sparse_moe.experts.0.w3.weight": "model-00064-of-00193.safetensors", + "model.layers.26.block_sparse_moe.experts.1.w1.weight": "model-00064-of-00193.safetensors", + "model.layers.26.block_sparse_moe.experts.1.w2.weight": "model-00064-of-00193.safetensors", + "model.layers.26.block_sparse_moe.experts.1.w3.weight": "model-00064-of-00193.safetensors", + "model.layers.26.block_sparse_moe.experts.2.w1.weight": "model-00064-of-00193.safetensors", + "model.layers.26.block_sparse_moe.experts.2.w2.weight": "model-00064-of-00193.safetensors", + "model.layers.26.block_sparse_moe.experts.2.w3.weight": "model-00064-of-00193.safetensors", + "model.layers.26.block_sparse_moe.experts.3.w1.weight": "model-00064-of-00193.safetensors", + "model.layers.26.block_sparse_moe.experts.3.w2.weight": "model-00064-of-00193.safetensors", + "model.layers.26.block_sparse_moe.experts.3.w3.weight": "model-00064-of-00193.safetensors", + "model.layers.26.block_sparse_moe.experts.4.w1.weight": "model-00065-of-00193.safetensors", + "model.layers.26.block_sparse_moe.experts.4.w2.weight": "model-00065-of-00193.safetensors", + "model.layers.26.block_sparse_moe.experts.4.w3.weight": "model-00065-of-00193.safetensors", + "model.layers.26.block_sparse_moe.experts.5.w1.weight": "model-00065-of-00193.safetensors", + "model.layers.26.block_sparse_moe.experts.5.w2.weight": "model-00065-of-00193.safetensors", + "model.layers.26.block_sparse_moe.experts.5.w3.weight": "model-00065-of-00193.safetensors", + "model.layers.26.block_sparse_moe.experts.6.w1.weight": "model-00065-of-00193.safetensors", + "model.layers.26.block_sparse_moe.experts.6.w2.weight": "model-00065-of-00193.safetensors", + "model.layers.26.block_sparse_moe.experts.6.w3.weight": "model-00065-of-00193.safetensors", + "model.layers.26.block_sparse_moe.experts.7.w1.weight": "model-00065-of-00193.safetensors", + "model.layers.26.block_sparse_moe.experts.7.w2.weight": "model-00066-of-00193.safetensors", + "model.layers.26.block_sparse_moe.experts.7.w3.weight": "model-00066-of-00193.safetensors", + "model.layers.26.block_sparse_moe.gate.weight": "model-00063-of-00193.safetensors", + "model.layers.26.input_layernorm.weight": "model-00066-of-00193.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00066-of-00193.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00063-of-00193.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00063-of-00193.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00063-of-00193.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00063-of-00193.safetensors", + "model.layers.27.block_sparse_moe.experts.0.w1.weight": "model-00066-of-00193.safetensors", + "model.layers.27.block_sparse_moe.experts.0.w2.weight": "model-00066-of-00193.safetensors", + "model.layers.27.block_sparse_moe.experts.0.w3.weight": "model-00066-of-00193.safetensors", + "model.layers.27.block_sparse_moe.experts.1.w1.weight": "model-00066-of-00193.safetensors", + "model.layers.27.block_sparse_moe.experts.1.w2.weight": "model-00066-of-00193.safetensors", + "model.layers.27.block_sparse_moe.experts.1.w3.weight": "model-00066-of-00193.safetensors", + "model.layers.27.block_sparse_moe.experts.2.w1.weight": "model-00066-of-00193.safetensors", + "model.layers.27.block_sparse_moe.experts.2.w2.weight": "model-00066-of-00193.safetensors", + "model.layers.27.block_sparse_moe.experts.2.w3.weight": "model-00067-of-00193.safetensors", + "model.layers.27.block_sparse_moe.experts.3.w1.weight": "model-00067-of-00193.safetensors", + "model.layers.27.block_sparse_moe.experts.3.w2.weight": "model-00067-of-00193.safetensors", + "model.layers.27.block_sparse_moe.experts.3.w3.weight": "model-00067-of-00193.safetensors", + "model.layers.27.block_sparse_moe.experts.4.w1.weight": "model-00067-of-00193.safetensors", + "model.layers.27.block_sparse_moe.experts.4.w2.weight": "model-00067-of-00193.safetensors", + "model.layers.27.block_sparse_moe.experts.4.w3.weight": "model-00067-of-00193.safetensors", + "model.layers.27.block_sparse_moe.experts.5.w1.weight": "model-00067-of-00193.safetensors", + "model.layers.27.block_sparse_moe.experts.5.w2.weight": "model-00067-of-00193.safetensors", + "model.layers.27.block_sparse_moe.experts.5.w3.weight": "model-00067-of-00193.safetensors", + "model.layers.27.block_sparse_moe.experts.6.w1.weight": "model-00068-of-00193.safetensors", + "model.layers.27.block_sparse_moe.experts.6.w2.weight": "model-00068-of-00193.safetensors", + "model.layers.27.block_sparse_moe.experts.6.w3.weight": "model-00068-of-00193.safetensors", + "model.layers.27.block_sparse_moe.experts.7.w1.weight": "model-00068-of-00193.safetensors", + "model.layers.27.block_sparse_moe.experts.7.w2.weight": "model-00068-of-00193.safetensors", + "model.layers.27.block_sparse_moe.experts.7.w3.weight": "model-00068-of-00193.safetensors", + "model.layers.27.block_sparse_moe.gate.weight": "model-00066-of-00193.safetensors", + "model.layers.27.input_layernorm.weight": "model-00068-of-00193.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00068-of-00193.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00066-of-00193.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00066-of-00193.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00066-of-00193.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00066-of-00193.safetensors", + "model.layers.28.block_sparse_moe.experts.0.w1.weight": "model-00068-of-00193.safetensors", + "model.layers.28.block_sparse_moe.experts.0.w2.weight": "model-00068-of-00193.safetensors", + "model.layers.28.block_sparse_moe.experts.0.w3.weight": "model-00068-of-00193.safetensors", + "model.layers.28.block_sparse_moe.experts.1.w1.weight": "model-00068-of-00193.safetensors", + "model.layers.28.block_sparse_moe.experts.1.w2.weight": "model-00069-of-00193.safetensors", + "model.layers.28.block_sparse_moe.experts.1.w3.weight": "model-00069-of-00193.safetensors", + "model.layers.28.block_sparse_moe.experts.2.w1.weight": "model-00069-of-00193.safetensors", + "model.layers.28.block_sparse_moe.experts.2.w2.weight": "model-00069-of-00193.safetensors", + "model.layers.28.block_sparse_moe.experts.2.w3.weight": "model-00069-of-00193.safetensors", + "model.layers.28.block_sparse_moe.experts.3.w1.weight": "model-00069-of-00193.safetensors", + "model.layers.28.block_sparse_moe.experts.3.w2.weight": "model-00069-of-00193.safetensors", + "model.layers.28.block_sparse_moe.experts.3.w3.weight": "model-00069-of-00193.safetensors", + "model.layers.28.block_sparse_moe.experts.4.w1.weight": "model-00069-of-00193.safetensors", + "model.layers.28.block_sparse_moe.experts.4.w2.weight": "model-00069-of-00193.safetensors", + "model.layers.28.block_sparse_moe.experts.4.w3.weight": "model-00070-of-00193.safetensors", + "model.layers.28.block_sparse_moe.experts.5.w1.weight": "model-00070-of-00193.safetensors", + "model.layers.28.block_sparse_moe.experts.5.w2.weight": "model-00070-of-00193.safetensors", + "model.layers.28.block_sparse_moe.experts.5.w3.weight": "model-00070-of-00193.safetensors", + "model.layers.28.block_sparse_moe.experts.6.w1.weight": "model-00070-of-00193.safetensors", + "model.layers.28.block_sparse_moe.experts.6.w2.weight": "model-00070-of-00193.safetensors", + "model.layers.28.block_sparse_moe.experts.6.w3.weight": "model-00070-of-00193.safetensors", + "model.layers.28.block_sparse_moe.experts.7.w1.weight": "model-00070-of-00193.safetensors", + "model.layers.28.block_sparse_moe.experts.7.w2.weight": "model-00070-of-00193.safetensors", + "model.layers.28.block_sparse_moe.experts.7.w3.weight": "model-00070-of-00193.safetensors", + "model.layers.28.block_sparse_moe.gate.weight": "model-00068-of-00193.safetensors", + "model.layers.28.input_layernorm.weight": "model-00070-of-00193.safetensors", + "model.layers.28.post_attention_layernorm.weight": "model-00070-of-00193.safetensors", + "model.layers.28.self_attn.k_proj.weight": "model-00068-of-00193.safetensors", + "model.layers.28.self_attn.o_proj.weight": "model-00068-of-00193.safetensors", + "model.layers.28.self_attn.q_proj.weight": "model-00068-of-00193.safetensors", + "model.layers.28.self_attn.v_proj.weight": "model-00068-of-00193.safetensors", + "model.layers.29.block_sparse_moe.experts.0.w1.weight": "model-00071-of-00193.safetensors", + "model.layers.29.block_sparse_moe.experts.0.w2.weight": "model-00071-of-00193.safetensors", + "model.layers.29.block_sparse_moe.experts.0.w3.weight": "model-00071-of-00193.safetensors", + "model.layers.29.block_sparse_moe.experts.1.w1.weight": "model-00071-of-00193.safetensors", + "model.layers.29.block_sparse_moe.experts.1.w2.weight": "model-00071-of-00193.safetensors", + "model.layers.29.block_sparse_moe.experts.1.w3.weight": "model-00071-of-00193.safetensors", + "model.layers.29.block_sparse_moe.experts.2.w1.weight": "model-00071-of-00193.safetensors", + "model.layers.29.block_sparse_moe.experts.2.w2.weight": "model-00071-of-00193.safetensors", + "model.layers.29.block_sparse_moe.experts.2.w3.weight": "model-00071-of-00193.safetensors", + "model.layers.29.block_sparse_moe.experts.3.w1.weight": "model-00071-of-00193.safetensors", + "model.layers.29.block_sparse_moe.experts.3.w2.weight": "model-00072-of-00193.safetensors", + "model.layers.29.block_sparse_moe.experts.3.w3.weight": "model-00072-of-00193.safetensors", + "model.layers.29.block_sparse_moe.experts.4.w1.weight": "model-00072-of-00193.safetensors", + "model.layers.29.block_sparse_moe.experts.4.w2.weight": "model-00072-of-00193.safetensors", + "model.layers.29.block_sparse_moe.experts.4.w3.weight": "model-00072-of-00193.safetensors", + "model.layers.29.block_sparse_moe.experts.5.w1.weight": "model-00072-of-00193.safetensors", + "model.layers.29.block_sparse_moe.experts.5.w2.weight": "model-00072-of-00193.safetensors", + "model.layers.29.block_sparse_moe.experts.5.w3.weight": "model-00072-of-00193.safetensors", + "model.layers.29.block_sparse_moe.experts.6.w1.weight": "model-00072-of-00193.safetensors", + "model.layers.29.block_sparse_moe.experts.6.w2.weight": "model-00072-of-00193.safetensors", + "model.layers.29.block_sparse_moe.experts.6.w3.weight": "model-00073-of-00193.safetensors", + "model.layers.29.block_sparse_moe.experts.7.w1.weight": "model-00073-of-00193.safetensors", + "model.layers.29.block_sparse_moe.experts.7.w2.weight": "model-00073-of-00193.safetensors", + "model.layers.29.block_sparse_moe.experts.7.w3.weight": "model-00073-of-00193.safetensors", + "model.layers.29.block_sparse_moe.gate.weight": "model-00070-of-00193.safetensors", + "model.layers.29.input_layernorm.weight": "model-00073-of-00193.safetensors", + "model.layers.29.post_attention_layernorm.weight": "model-00073-of-00193.safetensors", + "model.layers.29.self_attn.k_proj.weight": "model-00070-of-00193.safetensors", + "model.layers.29.self_attn.o_proj.weight": "model-00070-of-00193.safetensors", + "model.layers.29.self_attn.q_proj.weight": "model-00070-of-00193.safetensors", + "model.layers.29.self_attn.v_proj.weight": "model-00070-of-00193.safetensors", + "model.layers.3.block_sparse_moe.experts.0.w1.weight": "model-00008-of-00193.safetensors", + "model.layers.3.block_sparse_moe.experts.0.w2.weight": "model-00008-of-00193.safetensors", + "model.layers.3.block_sparse_moe.experts.0.w3.weight": "model-00008-of-00193.safetensors", + "model.layers.3.block_sparse_moe.experts.1.w1.weight": "model-00008-of-00193.safetensors", + "model.layers.3.block_sparse_moe.experts.1.w2.weight": "model-00009-of-00193.safetensors", + "model.layers.3.block_sparse_moe.experts.1.w3.weight": "model-00009-of-00193.safetensors", + "model.layers.3.block_sparse_moe.experts.2.w1.weight": "model-00009-of-00193.safetensors", + "model.layers.3.block_sparse_moe.experts.2.w2.weight": "model-00009-of-00193.safetensors", + "model.layers.3.block_sparse_moe.experts.2.w3.weight": "model-00009-of-00193.safetensors", + "model.layers.3.block_sparse_moe.experts.3.w1.weight": "model-00009-of-00193.safetensors", + "model.layers.3.block_sparse_moe.experts.3.w2.weight": "model-00009-of-00193.safetensors", + "model.layers.3.block_sparse_moe.experts.3.w3.weight": "model-00009-of-00193.safetensors", + "model.layers.3.block_sparse_moe.experts.4.w1.weight": "model-00009-of-00193.safetensors", + "model.layers.3.block_sparse_moe.experts.4.w2.weight": "model-00009-of-00193.safetensors", + "model.layers.3.block_sparse_moe.experts.4.w3.weight": "model-00010-of-00193.safetensors", + "model.layers.3.block_sparse_moe.experts.5.w1.weight": "model-00010-of-00193.safetensors", + "model.layers.3.block_sparse_moe.experts.5.w2.weight": "model-00010-of-00193.safetensors", + "model.layers.3.block_sparse_moe.experts.5.w3.weight": "model-00010-of-00193.safetensors", + "model.layers.3.block_sparse_moe.experts.6.w1.weight": "model-00010-of-00193.safetensors", + "model.layers.3.block_sparse_moe.experts.6.w2.weight": "model-00010-of-00193.safetensors", + "model.layers.3.block_sparse_moe.experts.6.w3.weight": "model-00010-of-00193.safetensors", + "model.layers.3.block_sparse_moe.experts.7.w1.weight": "model-00010-of-00193.safetensors", + "model.layers.3.block_sparse_moe.experts.7.w2.weight": "model-00010-of-00193.safetensors", + "model.layers.3.block_sparse_moe.experts.7.w3.weight": "model-00010-of-00193.safetensors", + "model.layers.3.block_sparse_moe.gate.weight": "model-00008-of-00193.safetensors", + "model.layers.3.input_layernorm.weight": "model-00010-of-00193.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00010-of-00193.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00008-of-00193.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00008-of-00193.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00008-of-00193.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00008-of-00193.safetensors", + "model.layers.30.block_sparse_moe.experts.0.w1.weight": "model-00073-of-00193.safetensors", + "model.layers.30.block_sparse_moe.experts.0.w2.weight": "model-00073-of-00193.safetensors", + "model.layers.30.block_sparse_moe.experts.0.w3.weight": "model-00073-of-00193.safetensors", + "model.layers.30.block_sparse_moe.experts.1.w1.weight": "model-00073-of-00193.safetensors", + "model.layers.30.block_sparse_moe.experts.1.w2.weight": "model-00073-of-00193.safetensors", + "model.layers.30.block_sparse_moe.experts.1.w3.weight": "model-00073-of-00193.safetensors", + "model.layers.30.block_sparse_moe.experts.2.w1.weight": "model-00074-of-00193.safetensors", + "model.layers.30.block_sparse_moe.experts.2.w2.weight": "model-00074-of-00193.safetensors", + "model.layers.30.block_sparse_moe.experts.2.w3.weight": "model-00074-of-00193.safetensors", + "model.layers.30.block_sparse_moe.experts.3.w1.weight": "model-00074-of-00193.safetensors", + "model.layers.30.block_sparse_moe.experts.3.w2.weight": "model-00074-of-00193.safetensors", + "model.layers.30.block_sparse_moe.experts.3.w3.weight": "model-00074-of-00193.safetensors", + "model.layers.30.block_sparse_moe.experts.4.w1.weight": "model-00074-of-00193.safetensors", + "model.layers.30.block_sparse_moe.experts.4.w2.weight": "model-00074-of-00193.safetensors", + "model.layers.30.block_sparse_moe.experts.4.w3.weight": "model-00074-of-00193.safetensors", + "model.layers.30.block_sparse_moe.experts.5.w1.weight": "model-00074-of-00193.safetensors", + "model.layers.30.block_sparse_moe.experts.5.w2.weight": "model-00075-of-00193.safetensors", + "model.layers.30.block_sparse_moe.experts.5.w3.weight": "model-00075-of-00193.safetensors", + "model.layers.30.block_sparse_moe.experts.6.w1.weight": "model-00075-of-00193.safetensors", + "model.layers.30.block_sparse_moe.experts.6.w2.weight": "model-00075-of-00193.safetensors", + "model.layers.30.block_sparse_moe.experts.6.w3.weight": "model-00075-of-00193.safetensors", + "model.layers.30.block_sparse_moe.experts.7.w1.weight": "model-00075-of-00193.safetensors", + "model.layers.30.block_sparse_moe.experts.7.w2.weight": "model-00075-of-00193.safetensors", + "model.layers.30.block_sparse_moe.experts.7.w3.weight": "model-00075-of-00193.safetensors", + "model.layers.30.block_sparse_moe.gate.weight": "model-00073-of-00193.safetensors", + "model.layers.30.input_layernorm.weight": "model-00075-of-00193.safetensors", + "model.layers.30.post_attention_layernorm.weight": "model-00075-of-00193.safetensors", + "model.layers.30.self_attn.k_proj.weight": "model-00073-of-00193.safetensors", + "model.layers.30.self_attn.o_proj.weight": "model-00073-of-00193.safetensors", + "model.layers.30.self_attn.q_proj.weight": "model-00073-of-00193.safetensors", + "model.layers.30.self_attn.v_proj.weight": "model-00073-of-00193.safetensors", + "model.layers.31.block_sparse_moe.experts.0.w1.weight": "model-00075-of-00193.safetensors", + "model.layers.31.block_sparse_moe.experts.0.w2.weight": "model-00075-of-00193.safetensors", + "model.layers.31.block_sparse_moe.experts.0.w3.weight": "model-00076-of-00193.safetensors", + "model.layers.31.block_sparse_moe.experts.1.w1.weight": "model-00076-of-00193.safetensors", + "model.layers.31.block_sparse_moe.experts.1.w2.weight": "model-00076-of-00193.safetensors", + "model.layers.31.block_sparse_moe.experts.1.w3.weight": "model-00076-of-00193.safetensors", + "model.layers.31.block_sparse_moe.experts.2.w1.weight": "model-00076-of-00193.safetensors", + "model.layers.31.block_sparse_moe.experts.2.w2.weight": "model-00076-of-00193.safetensors", + "model.layers.31.block_sparse_moe.experts.2.w3.weight": "model-00076-of-00193.safetensors", + "model.layers.31.block_sparse_moe.experts.3.w1.weight": "model-00076-of-00193.safetensors", + "model.layers.31.block_sparse_moe.experts.3.w2.weight": "model-00076-of-00193.safetensors", + "model.layers.31.block_sparse_moe.experts.3.w3.weight": "model-00076-of-00193.safetensors", + "model.layers.31.block_sparse_moe.experts.4.w1.weight": "model-00077-of-00193.safetensors", + "model.layers.31.block_sparse_moe.experts.4.w2.weight": "model-00077-of-00193.safetensors", + "model.layers.31.block_sparse_moe.experts.4.w3.weight": "model-00077-of-00193.safetensors", + "model.layers.31.block_sparse_moe.experts.5.w1.weight": "model-00077-of-00193.safetensors", + "model.layers.31.block_sparse_moe.experts.5.w2.weight": "model-00077-of-00193.safetensors", + "model.layers.31.block_sparse_moe.experts.5.w3.weight": "model-00077-of-00193.safetensors", + "model.layers.31.block_sparse_moe.experts.6.w1.weight": "model-00077-of-00193.safetensors", + "model.layers.31.block_sparse_moe.experts.6.w2.weight": "model-00077-of-00193.safetensors", + "model.layers.31.block_sparse_moe.experts.6.w3.weight": "model-00077-of-00193.safetensors", + "model.layers.31.block_sparse_moe.experts.7.w1.weight": "model-00077-of-00193.safetensors", + "model.layers.31.block_sparse_moe.experts.7.w2.weight": "model-00078-of-00193.safetensors", + "model.layers.31.block_sparse_moe.experts.7.w3.weight": "model-00078-of-00193.safetensors", + "model.layers.31.block_sparse_moe.gate.weight": "model-00075-of-00193.safetensors", + "model.layers.31.input_layernorm.weight": "model-00078-of-00193.safetensors", + "model.layers.31.post_attention_layernorm.weight": "model-00078-of-00193.safetensors", + "model.layers.31.self_attn.k_proj.weight": "model-00075-of-00193.safetensors", + "model.layers.31.self_attn.o_proj.weight": "model-00075-of-00193.safetensors", + "model.layers.31.self_attn.q_proj.weight": "model-00075-of-00193.safetensors", + "model.layers.31.self_attn.v_proj.weight": "model-00075-of-00193.safetensors", + "model.layers.32.block_sparse_moe.experts.0.w1.weight": "model-00078-of-00193.safetensors", + "model.layers.32.block_sparse_moe.experts.0.w2.weight": "model-00078-of-00193.safetensors", + "model.layers.32.block_sparse_moe.experts.0.w3.weight": "model-00078-of-00193.safetensors", + "model.layers.32.block_sparse_moe.experts.1.w1.weight": "model-00078-of-00193.safetensors", + "model.layers.32.block_sparse_moe.experts.1.w2.weight": "model-00078-of-00193.safetensors", + "model.layers.32.block_sparse_moe.experts.1.w3.weight": "model-00078-of-00193.safetensors", + "model.layers.32.block_sparse_moe.experts.2.w1.weight": "model-00078-of-00193.safetensors", + "model.layers.32.block_sparse_moe.experts.2.w2.weight": "model-00078-of-00193.safetensors", + "model.layers.32.block_sparse_moe.experts.2.w3.weight": "model-00079-of-00193.safetensors", + "model.layers.32.block_sparse_moe.experts.3.w1.weight": "model-00079-of-00193.safetensors", + "model.layers.32.block_sparse_moe.experts.3.w2.weight": "model-00079-of-00193.safetensors", + "model.layers.32.block_sparse_moe.experts.3.w3.weight": "model-00079-of-00193.safetensors", + "model.layers.32.block_sparse_moe.experts.4.w1.weight": "model-00079-of-00193.safetensors", + "model.layers.32.block_sparse_moe.experts.4.w2.weight": "model-00079-of-00193.safetensors", + "model.layers.32.block_sparse_moe.experts.4.w3.weight": "model-00079-of-00193.safetensors", + "model.layers.32.block_sparse_moe.experts.5.w1.weight": "model-00079-of-00193.safetensors", + "model.layers.32.block_sparse_moe.experts.5.w2.weight": "model-00079-of-00193.safetensors", + "model.layers.32.block_sparse_moe.experts.5.w3.weight": "model-00079-of-00193.safetensors", + "model.layers.32.block_sparse_moe.experts.6.w1.weight": "model-00080-of-00193.safetensors", + "model.layers.32.block_sparse_moe.experts.6.w2.weight": "model-00080-of-00193.safetensors", + "model.layers.32.block_sparse_moe.experts.6.w3.weight": "model-00080-of-00193.safetensors", + "model.layers.32.block_sparse_moe.experts.7.w1.weight": "model-00080-of-00193.safetensors", + "model.layers.32.block_sparse_moe.experts.7.w2.weight": "model-00080-of-00193.safetensors", + "model.layers.32.block_sparse_moe.experts.7.w3.weight": "model-00080-of-00193.safetensors", + "model.layers.32.block_sparse_moe.gate.weight": "model-00078-of-00193.safetensors", + "model.layers.32.input_layernorm.weight": "model-00080-of-00193.safetensors", + "model.layers.32.post_attention_layernorm.weight": "model-00080-of-00193.safetensors", + "model.layers.32.self_attn.k_proj.weight": "model-00078-of-00193.safetensors", + "model.layers.32.self_attn.o_proj.weight": "model-00078-of-00193.safetensors", + "model.layers.32.self_attn.q_proj.weight": "model-00078-of-00193.safetensors", + "model.layers.32.self_attn.v_proj.weight": "model-00078-of-00193.safetensors", + "model.layers.33.block_sparse_moe.experts.0.w1.weight": "model-00080-of-00193.safetensors", + "model.layers.33.block_sparse_moe.experts.0.w2.weight": "model-00080-of-00193.safetensors", + "model.layers.33.block_sparse_moe.experts.0.w3.weight": "model-00080-of-00193.safetensors", + "model.layers.33.block_sparse_moe.experts.1.w1.weight": "model-00080-of-00193.safetensors", + "model.layers.33.block_sparse_moe.experts.1.w2.weight": "model-00081-of-00193.safetensors", + "model.layers.33.block_sparse_moe.experts.1.w3.weight": "model-00081-of-00193.safetensors", + "model.layers.33.block_sparse_moe.experts.2.w1.weight": "model-00081-of-00193.safetensors", + "model.layers.33.block_sparse_moe.experts.2.w2.weight": "model-00081-of-00193.safetensors", + "model.layers.33.block_sparse_moe.experts.2.w3.weight": "model-00081-of-00193.safetensors", + "model.layers.33.block_sparse_moe.experts.3.w1.weight": "model-00081-of-00193.safetensors", + "model.layers.33.block_sparse_moe.experts.3.w2.weight": "model-00081-of-00193.safetensors", + "model.layers.33.block_sparse_moe.experts.3.w3.weight": "model-00081-of-00193.safetensors", + "model.layers.33.block_sparse_moe.experts.4.w1.weight": "model-00081-of-00193.safetensors", + "model.layers.33.block_sparse_moe.experts.4.w2.weight": "model-00081-of-00193.safetensors", + "model.layers.33.block_sparse_moe.experts.4.w3.weight": "model-00082-of-00193.safetensors", + "model.layers.33.block_sparse_moe.experts.5.w1.weight": "model-00082-of-00193.safetensors", + "model.layers.33.block_sparse_moe.experts.5.w2.weight": "model-00082-of-00193.safetensors", + "model.layers.33.block_sparse_moe.experts.5.w3.weight": "model-00082-of-00193.safetensors", + "model.layers.33.block_sparse_moe.experts.6.w1.weight": "model-00082-of-00193.safetensors", + "model.layers.33.block_sparse_moe.experts.6.w2.weight": "model-00082-of-00193.safetensors", + "model.layers.33.block_sparse_moe.experts.6.w3.weight": "model-00082-of-00193.safetensors", + "model.layers.33.block_sparse_moe.experts.7.w1.weight": "model-00082-of-00193.safetensors", + "model.layers.33.block_sparse_moe.experts.7.w2.weight": "model-00082-of-00193.safetensors", + "model.layers.33.block_sparse_moe.experts.7.w3.weight": "model-00082-of-00193.safetensors", + "model.layers.33.block_sparse_moe.gate.weight": "model-00080-of-00193.safetensors", + "model.layers.33.input_layernorm.weight": "model-00082-of-00193.safetensors", + "model.layers.33.post_attention_layernorm.weight": "model-00082-of-00193.safetensors", + "model.layers.33.self_attn.k_proj.weight": "model-00080-of-00193.safetensors", + "model.layers.33.self_attn.o_proj.weight": "model-00080-of-00193.safetensors", + "model.layers.33.self_attn.q_proj.weight": "model-00080-of-00193.safetensors", + "model.layers.33.self_attn.v_proj.weight": "model-00080-of-00193.safetensors", + "model.layers.34.block_sparse_moe.experts.0.w1.weight": "model-00083-of-00193.safetensors", + "model.layers.34.block_sparse_moe.experts.0.w2.weight": "model-00083-of-00193.safetensors", + "model.layers.34.block_sparse_moe.experts.0.w3.weight": "model-00083-of-00193.safetensors", + "model.layers.34.block_sparse_moe.experts.1.w1.weight": "model-00083-of-00193.safetensors", + "model.layers.34.block_sparse_moe.experts.1.w2.weight": "model-00083-of-00193.safetensors", + "model.layers.34.block_sparse_moe.experts.1.w3.weight": "model-00083-of-00193.safetensors", + "model.layers.34.block_sparse_moe.experts.2.w1.weight": "model-00083-of-00193.safetensors", + "model.layers.34.block_sparse_moe.experts.2.w2.weight": "model-00083-of-00193.safetensors", + "model.layers.34.block_sparse_moe.experts.2.w3.weight": "model-00083-of-00193.safetensors", + "model.layers.34.block_sparse_moe.experts.3.w1.weight": "model-00083-of-00193.safetensors", + "model.layers.34.block_sparse_moe.experts.3.w2.weight": "model-00084-of-00193.safetensors", + "model.layers.34.block_sparse_moe.experts.3.w3.weight": "model-00084-of-00193.safetensors", + "model.layers.34.block_sparse_moe.experts.4.w1.weight": "model-00084-of-00193.safetensors", + "model.layers.34.block_sparse_moe.experts.4.w2.weight": "model-00084-of-00193.safetensors", + "model.layers.34.block_sparse_moe.experts.4.w3.weight": "model-00084-of-00193.safetensors", + "model.layers.34.block_sparse_moe.experts.5.w1.weight": "model-00084-of-00193.safetensors", + "model.layers.34.block_sparse_moe.experts.5.w2.weight": "model-00084-of-00193.safetensors", + "model.layers.34.block_sparse_moe.experts.5.w3.weight": "model-00084-of-00193.safetensors", + "model.layers.34.block_sparse_moe.experts.6.w1.weight": "model-00084-of-00193.safetensors", + "model.layers.34.block_sparse_moe.experts.6.w2.weight": "model-00084-of-00193.safetensors", + "model.layers.34.block_sparse_moe.experts.6.w3.weight": "model-00085-of-00193.safetensors", + "model.layers.34.block_sparse_moe.experts.7.w1.weight": "model-00085-of-00193.safetensors", + "model.layers.34.block_sparse_moe.experts.7.w2.weight": "model-00085-of-00193.safetensors", + "model.layers.34.block_sparse_moe.experts.7.w3.weight": "model-00085-of-00193.safetensors", + "model.layers.34.block_sparse_moe.gate.weight": "model-00082-of-00193.safetensors", + "model.layers.34.input_layernorm.weight": "model-00085-of-00193.safetensors", + "model.layers.34.post_attention_layernorm.weight": "model-00085-of-00193.safetensors", + "model.layers.34.self_attn.k_proj.weight": "model-00082-of-00193.safetensors", + "model.layers.34.self_attn.o_proj.weight": "model-00082-of-00193.safetensors", + "model.layers.34.self_attn.q_proj.weight": "model-00082-of-00193.safetensors", + "model.layers.34.self_attn.v_proj.weight": "model-00082-of-00193.safetensors", + "model.layers.35.block_sparse_moe.experts.0.w1.weight": "model-00085-of-00193.safetensors", + "model.layers.35.block_sparse_moe.experts.0.w2.weight": "model-00085-of-00193.safetensors", + "model.layers.35.block_sparse_moe.experts.0.w3.weight": "model-00085-of-00193.safetensors", + "model.layers.35.block_sparse_moe.experts.1.w1.weight": "model-00085-of-00193.safetensors", + "model.layers.35.block_sparse_moe.experts.1.w2.weight": "model-00085-of-00193.safetensors", + "model.layers.35.block_sparse_moe.experts.1.w3.weight": "model-00085-of-00193.safetensors", + "model.layers.35.block_sparse_moe.experts.2.w1.weight": "model-00086-of-00193.safetensors", + "model.layers.35.block_sparse_moe.experts.2.w2.weight": "model-00086-of-00193.safetensors", + "model.layers.35.block_sparse_moe.experts.2.w3.weight": "model-00086-of-00193.safetensors", + "model.layers.35.block_sparse_moe.experts.3.w1.weight": "model-00086-of-00193.safetensors", + "model.layers.35.block_sparse_moe.experts.3.w2.weight": "model-00086-of-00193.safetensors", + "model.layers.35.block_sparse_moe.experts.3.w3.weight": "model-00086-of-00193.safetensors", + "model.layers.35.block_sparse_moe.experts.4.w1.weight": "model-00086-of-00193.safetensors", + "model.layers.35.block_sparse_moe.experts.4.w2.weight": "model-00086-of-00193.safetensors", + "model.layers.35.block_sparse_moe.experts.4.w3.weight": "model-00086-of-00193.safetensors", + "model.layers.35.block_sparse_moe.experts.5.w1.weight": "model-00086-of-00193.safetensors", + "model.layers.35.block_sparse_moe.experts.5.w2.weight": "model-00087-of-00193.safetensors", + "model.layers.35.block_sparse_moe.experts.5.w3.weight": "model-00087-of-00193.safetensors", + "model.layers.35.block_sparse_moe.experts.6.w1.weight": "model-00087-of-00193.safetensors", + "model.layers.35.block_sparse_moe.experts.6.w2.weight": "model-00087-of-00193.safetensors", + "model.layers.35.block_sparse_moe.experts.6.w3.weight": "model-00087-of-00193.safetensors", + "model.layers.35.block_sparse_moe.experts.7.w1.weight": "model-00087-of-00193.safetensors", + "model.layers.35.block_sparse_moe.experts.7.w2.weight": "model-00087-of-00193.safetensors", + "model.layers.35.block_sparse_moe.experts.7.w3.weight": "model-00087-of-00193.safetensors", + "model.layers.35.block_sparse_moe.gate.weight": "model-00085-of-00193.safetensors", + "model.layers.35.input_layernorm.weight": "model-00087-of-00193.safetensors", + "model.layers.35.post_attention_layernorm.weight": "model-00087-of-00193.safetensors", + "model.layers.35.self_attn.k_proj.weight": "model-00085-of-00193.safetensors", + "model.layers.35.self_attn.o_proj.weight": "model-00085-of-00193.safetensors", + "model.layers.35.self_attn.q_proj.weight": "model-00085-of-00193.safetensors", + "model.layers.35.self_attn.v_proj.weight": "model-00085-of-00193.safetensors", + "model.layers.36.block_sparse_moe.experts.0.w1.weight": "model-00087-of-00193.safetensors", + "model.layers.36.block_sparse_moe.experts.0.w2.weight": "model-00087-of-00193.safetensors", + "model.layers.36.block_sparse_moe.experts.0.w3.weight": "model-00088-of-00193.safetensors", + "model.layers.36.block_sparse_moe.experts.1.w1.weight": "model-00088-of-00193.safetensors", + "model.layers.36.block_sparse_moe.experts.1.w2.weight": "model-00088-of-00193.safetensors", + "model.layers.36.block_sparse_moe.experts.1.w3.weight": "model-00088-of-00193.safetensors", + "model.layers.36.block_sparse_moe.experts.2.w1.weight": "model-00088-of-00193.safetensors", + "model.layers.36.block_sparse_moe.experts.2.w2.weight": "model-00088-of-00193.safetensors", + "model.layers.36.block_sparse_moe.experts.2.w3.weight": "model-00088-of-00193.safetensors", + "model.layers.36.block_sparse_moe.experts.3.w1.weight": "model-00088-of-00193.safetensors", + "model.layers.36.block_sparse_moe.experts.3.w2.weight": "model-00088-of-00193.safetensors", + "model.layers.36.block_sparse_moe.experts.3.w3.weight": "model-00088-of-00193.safetensors", + "model.layers.36.block_sparse_moe.experts.4.w1.weight": "model-00089-of-00193.safetensors", + "model.layers.36.block_sparse_moe.experts.4.w2.weight": "model-00089-of-00193.safetensors", + "model.layers.36.block_sparse_moe.experts.4.w3.weight": "model-00089-of-00193.safetensors", + "model.layers.36.block_sparse_moe.experts.5.w1.weight": "model-00089-of-00193.safetensors", + "model.layers.36.block_sparse_moe.experts.5.w2.weight": "model-00089-of-00193.safetensors", + "model.layers.36.block_sparse_moe.experts.5.w3.weight": "model-00089-of-00193.safetensors", + "model.layers.36.block_sparse_moe.experts.6.w1.weight": "model-00089-of-00193.safetensors", + "model.layers.36.block_sparse_moe.experts.6.w2.weight": "model-00089-of-00193.safetensors", + "model.layers.36.block_sparse_moe.experts.6.w3.weight": "model-00089-of-00193.safetensors", + "model.layers.36.block_sparse_moe.experts.7.w1.weight": "model-00089-of-00193.safetensors", + "model.layers.36.block_sparse_moe.experts.7.w2.weight": "model-00090-of-00193.safetensors", + "model.layers.36.block_sparse_moe.experts.7.w3.weight": "model-00090-of-00193.safetensors", + "model.layers.36.block_sparse_moe.gate.weight": "model-00087-of-00193.safetensors", + "model.layers.36.input_layernorm.weight": "model-00090-of-00193.safetensors", + "model.layers.36.post_attention_layernorm.weight": "model-00090-of-00193.safetensors", + "model.layers.36.self_attn.k_proj.weight": "model-00087-of-00193.safetensors", + "model.layers.36.self_attn.o_proj.weight": "model-00087-of-00193.safetensors", + "model.layers.36.self_attn.q_proj.weight": "model-00087-of-00193.safetensors", + "model.layers.36.self_attn.v_proj.weight": "model-00087-of-00193.safetensors", + "model.layers.37.block_sparse_moe.experts.0.w1.weight": "model-00090-of-00193.safetensors", + "model.layers.37.block_sparse_moe.experts.0.w2.weight": "model-00090-of-00193.safetensors", + "model.layers.37.block_sparse_moe.experts.0.w3.weight": "model-00090-of-00193.safetensors", + "model.layers.37.block_sparse_moe.experts.1.w1.weight": "model-00090-of-00193.safetensors", + "model.layers.37.block_sparse_moe.experts.1.w2.weight": "model-00090-of-00193.safetensors", + "model.layers.37.block_sparse_moe.experts.1.w3.weight": "model-00090-of-00193.safetensors", + "model.layers.37.block_sparse_moe.experts.2.w1.weight": "model-00090-of-00193.safetensors", + "model.layers.37.block_sparse_moe.experts.2.w2.weight": "model-00090-of-00193.safetensors", + "model.layers.37.block_sparse_moe.experts.2.w3.weight": "model-00091-of-00193.safetensors", + "model.layers.37.block_sparse_moe.experts.3.w1.weight": "model-00091-of-00193.safetensors", + "model.layers.37.block_sparse_moe.experts.3.w2.weight": "model-00091-of-00193.safetensors", + "model.layers.37.block_sparse_moe.experts.3.w3.weight": "model-00091-of-00193.safetensors", + "model.layers.37.block_sparse_moe.experts.4.w1.weight": "model-00091-of-00193.safetensors", + "model.layers.37.block_sparse_moe.experts.4.w2.weight": "model-00091-of-00193.safetensors", + "model.layers.37.block_sparse_moe.experts.4.w3.weight": "model-00091-of-00193.safetensors", + "model.layers.37.block_sparse_moe.experts.5.w1.weight": "model-00091-of-00193.safetensors", + "model.layers.37.block_sparse_moe.experts.5.w2.weight": "model-00091-of-00193.safetensors", + "model.layers.37.block_sparse_moe.experts.5.w3.weight": "model-00091-of-00193.safetensors", + "model.layers.37.block_sparse_moe.experts.6.w1.weight": "model-00092-of-00193.safetensors", + "model.layers.37.block_sparse_moe.experts.6.w2.weight": "model-00092-of-00193.safetensors", + "model.layers.37.block_sparse_moe.experts.6.w3.weight": "model-00092-of-00193.safetensors", + "model.layers.37.block_sparse_moe.experts.7.w1.weight": "model-00092-of-00193.safetensors", + "model.layers.37.block_sparse_moe.experts.7.w2.weight": "model-00092-of-00193.safetensors", + "model.layers.37.block_sparse_moe.experts.7.w3.weight": "model-00092-of-00193.safetensors", + "model.layers.37.block_sparse_moe.gate.weight": "model-00090-of-00193.safetensors", + "model.layers.37.input_layernorm.weight": "model-00092-of-00193.safetensors", + "model.layers.37.post_attention_layernorm.weight": "model-00092-of-00193.safetensors", + "model.layers.37.self_attn.k_proj.weight": "model-00090-of-00193.safetensors", + "model.layers.37.self_attn.o_proj.weight": "model-00090-of-00193.safetensors", + "model.layers.37.self_attn.q_proj.weight": "model-00090-of-00193.safetensors", + "model.layers.37.self_attn.v_proj.weight": "model-00090-of-00193.safetensors", + "model.layers.38.block_sparse_moe.experts.0.w1.weight": "model-00092-of-00193.safetensors", + "model.layers.38.block_sparse_moe.experts.0.w2.weight": "model-00092-of-00193.safetensors", + "model.layers.38.block_sparse_moe.experts.0.w3.weight": "model-00092-of-00193.safetensors", + "model.layers.38.block_sparse_moe.experts.1.w1.weight": "model-00092-of-00193.safetensors", + "model.layers.38.block_sparse_moe.experts.1.w2.weight": "model-00093-of-00193.safetensors", + "model.layers.38.block_sparse_moe.experts.1.w3.weight": "model-00093-of-00193.safetensors", + "model.layers.38.block_sparse_moe.experts.2.w1.weight": "model-00093-of-00193.safetensors", + "model.layers.38.block_sparse_moe.experts.2.w2.weight": "model-00093-of-00193.safetensors", + "model.layers.38.block_sparse_moe.experts.2.w3.weight": "model-00093-of-00193.safetensors", + "model.layers.38.block_sparse_moe.experts.3.w1.weight": "model-00093-of-00193.safetensors", + "model.layers.38.block_sparse_moe.experts.3.w2.weight": "model-00093-of-00193.safetensors", + "model.layers.38.block_sparse_moe.experts.3.w3.weight": "model-00093-of-00193.safetensors", + "model.layers.38.block_sparse_moe.experts.4.w1.weight": "model-00093-of-00193.safetensors", + "model.layers.38.block_sparse_moe.experts.4.w2.weight": "model-00093-of-00193.safetensors", + "model.layers.38.block_sparse_moe.experts.4.w3.weight": "model-00094-of-00193.safetensors", + "model.layers.38.block_sparse_moe.experts.5.w1.weight": "model-00094-of-00193.safetensors", + "model.layers.38.block_sparse_moe.experts.5.w2.weight": "model-00094-of-00193.safetensors", + "model.layers.38.block_sparse_moe.experts.5.w3.weight": "model-00094-of-00193.safetensors", + "model.layers.38.block_sparse_moe.experts.6.w1.weight": "model-00094-of-00193.safetensors", + "model.layers.38.block_sparse_moe.experts.6.w2.weight": "model-00094-of-00193.safetensors", + "model.layers.38.block_sparse_moe.experts.6.w3.weight": "model-00094-of-00193.safetensors", + "model.layers.38.block_sparse_moe.experts.7.w1.weight": "model-00094-of-00193.safetensors", + "model.layers.38.block_sparse_moe.experts.7.w2.weight": "model-00094-of-00193.safetensors", + "model.layers.38.block_sparse_moe.experts.7.w3.weight": "model-00094-of-00193.safetensors", + "model.layers.38.block_sparse_moe.gate.weight": "model-00092-of-00193.safetensors", + "model.layers.38.input_layernorm.weight": "model-00094-of-00193.safetensors", + "model.layers.38.post_attention_layernorm.weight": "model-00094-of-00193.safetensors", + "model.layers.38.self_attn.k_proj.weight": "model-00092-of-00193.safetensors", + "model.layers.38.self_attn.o_proj.weight": "model-00092-of-00193.safetensors", + "model.layers.38.self_attn.q_proj.weight": "model-00092-of-00193.safetensors", + "model.layers.38.self_attn.v_proj.weight": "model-00092-of-00193.safetensors", + "model.layers.39.block_sparse_moe.experts.0.w1.weight": "model-00095-of-00193.safetensors", + "model.layers.39.block_sparse_moe.experts.0.w2.weight": "model-00095-of-00193.safetensors", + "model.layers.39.block_sparse_moe.experts.0.w3.weight": "model-00095-of-00193.safetensors", + "model.layers.39.block_sparse_moe.experts.1.w1.weight": "model-00095-of-00193.safetensors", + "model.layers.39.block_sparse_moe.experts.1.w2.weight": "model-00095-of-00193.safetensors", + "model.layers.39.block_sparse_moe.experts.1.w3.weight": "model-00095-of-00193.safetensors", + "model.layers.39.block_sparse_moe.experts.2.w1.weight": "model-00095-of-00193.safetensors", + "model.layers.39.block_sparse_moe.experts.2.w2.weight": "model-00095-of-00193.safetensors", + "model.layers.39.block_sparse_moe.experts.2.w3.weight": "model-00095-of-00193.safetensors", + "model.layers.39.block_sparse_moe.experts.3.w1.weight": "model-00095-of-00193.safetensors", + "model.layers.39.block_sparse_moe.experts.3.w2.weight": "model-00096-of-00193.safetensors", + "model.layers.39.block_sparse_moe.experts.3.w3.weight": "model-00096-of-00193.safetensors", + "model.layers.39.block_sparse_moe.experts.4.w1.weight": "model-00096-of-00193.safetensors", + "model.layers.39.block_sparse_moe.experts.4.w2.weight": "model-00096-of-00193.safetensors", + "model.layers.39.block_sparse_moe.experts.4.w3.weight": "model-00096-of-00193.safetensors", + "model.layers.39.block_sparse_moe.experts.5.w1.weight": "model-00096-of-00193.safetensors", + "model.layers.39.block_sparse_moe.experts.5.w2.weight": "model-00096-of-00193.safetensors", + "model.layers.39.block_sparse_moe.experts.5.w3.weight": "model-00096-of-00193.safetensors", + "model.layers.39.block_sparse_moe.experts.6.w1.weight": "model-00096-of-00193.safetensors", + "model.layers.39.block_sparse_moe.experts.6.w2.weight": "model-00096-of-00193.safetensors", + "model.layers.39.block_sparse_moe.experts.6.w3.weight": "model-00097-of-00193.safetensors", + "model.layers.39.block_sparse_moe.experts.7.w1.weight": "model-00097-of-00193.safetensors", + "model.layers.39.block_sparse_moe.experts.7.w2.weight": "model-00097-of-00193.safetensors", + "model.layers.39.block_sparse_moe.experts.7.w3.weight": "model-00097-of-00193.safetensors", + "model.layers.39.block_sparse_moe.gate.weight": "model-00094-of-00193.safetensors", + "model.layers.39.input_layernorm.weight": "model-00097-of-00193.safetensors", + "model.layers.39.post_attention_layernorm.weight": "model-00097-of-00193.safetensors", + "model.layers.39.self_attn.k_proj.weight": "model-00094-of-00193.safetensors", + "model.layers.39.self_attn.o_proj.weight": "model-00094-of-00193.safetensors", + "model.layers.39.self_attn.q_proj.weight": "model-00094-of-00193.safetensors", + "model.layers.39.self_attn.v_proj.weight": "model-00094-of-00193.safetensors", + "model.layers.4.block_sparse_moe.experts.0.w1.weight": "model-00011-of-00193.safetensors", + "model.layers.4.block_sparse_moe.experts.0.w2.weight": "model-00011-of-00193.safetensors", + "model.layers.4.block_sparse_moe.experts.0.w3.weight": "model-00011-of-00193.safetensors", + "model.layers.4.block_sparse_moe.experts.1.w1.weight": "model-00011-of-00193.safetensors", + "model.layers.4.block_sparse_moe.experts.1.w2.weight": "model-00011-of-00193.safetensors", + "model.layers.4.block_sparse_moe.experts.1.w3.weight": "model-00011-of-00193.safetensors", + "model.layers.4.block_sparse_moe.experts.2.w1.weight": "model-00011-of-00193.safetensors", + "model.layers.4.block_sparse_moe.experts.2.w2.weight": "model-00011-of-00193.safetensors", + "model.layers.4.block_sparse_moe.experts.2.w3.weight": "model-00011-of-00193.safetensors", + "model.layers.4.block_sparse_moe.experts.3.w1.weight": "model-00011-of-00193.safetensors", + "model.layers.4.block_sparse_moe.experts.3.w2.weight": "model-00012-of-00193.safetensors", + "model.layers.4.block_sparse_moe.experts.3.w3.weight": "model-00012-of-00193.safetensors", + "model.layers.4.block_sparse_moe.experts.4.w1.weight": "model-00012-of-00193.safetensors", + "model.layers.4.block_sparse_moe.experts.4.w2.weight": "model-00012-of-00193.safetensors", + "model.layers.4.block_sparse_moe.experts.4.w3.weight": "model-00012-of-00193.safetensors", + "model.layers.4.block_sparse_moe.experts.5.w1.weight": "model-00012-of-00193.safetensors", + "model.layers.4.block_sparse_moe.experts.5.w2.weight": "model-00012-of-00193.safetensors", + "model.layers.4.block_sparse_moe.experts.5.w3.weight": "model-00012-of-00193.safetensors", + "model.layers.4.block_sparse_moe.experts.6.w1.weight": "model-00012-of-00193.safetensors", + "model.layers.4.block_sparse_moe.experts.6.w2.weight": "model-00012-of-00193.safetensors", + "model.layers.4.block_sparse_moe.experts.6.w3.weight": "model-00013-of-00193.safetensors", + "model.layers.4.block_sparse_moe.experts.7.w1.weight": "model-00013-of-00193.safetensors", + "model.layers.4.block_sparse_moe.experts.7.w2.weight": "model-00013-of-00193.safetensors", + "model.layers.4.block_sparse_moe.experts.7.w3.weight": "model-00013-of-00193.safetensors", + "model.layers.4.block_sparse_moe.gate.weight": "model-00010-of-00193.safetensors", + "model.layers.4.input_layernorm.weight": "model-00013-of-00193.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00013-of-00193.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00010-of-00193.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00010-of-00193.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00010-of-00193.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00010-of-00193.safetensors", + "model.layers.40.block_sparse_moe.experts.0.w1.weight": "model-00097-of-00193.safetensors", + "model.layers.40.block_sparse_moe.experts.0.w2.weight": "model-00097-of-00193.safetensors", + "model.layers.40.block_sparse_moe.experts.0.w3.weight": "model-00097-of-00193.safetensors", + "model.layers.40.block_sparse_moe.experts.1.w1.weight": "model-00097-of-00193.safetensors", + "model.layers.40.block_sparse_moe.experts.1.w2.weight": "model-00097-of-00193.safetensors", + "model.layers.40.block_sparse_moe.experts.1.w3.weight": "model-00097-of-00193.safetensors", + "model.layers.40.block_sparse_moe.experts.2.w1.weight": "model-00098-of-00193.safetensors", + "model.layers.40.block_sparse_moe.experts.2.w2.weight": "model-00098-of-00193.safetensors", + "model.layers.40.block_sparse_moe.experts.2.w3.weight": "model-00098-of-00193.safetensors", + "model.layers.40.block_sparse_moe.experts.3.w1.weight": "model-00098-of-00193.safetensors", + "model.layers.40.block_sparse_moe.experts.3.w2.weight": "model-00098-of-00193.safetensors", + "model.layers.40.block_sparse_moe.experts.3.w3.weight": "model-00098-of-00193.safetensors", + "model.layers.40.block_sparse_moe.experts.4.w1.weight": "model-00098-of-00193.safetensors", + "model.layers.40.block_sparse_moe.experts.4.w2.weight": "model-00098-of-00193.safetensors", + "model.layers.40.block_sparse_moe.experts.4.w3.weight": "model-00098-of-00193.safetensors", + "model.layers.40.block_sparse_moe.experts.5.w1.weight": "model-00098-of-00193.safetensors", + "model.layers.40.block_sparse_moe.experts.5.w2.weight": "model-00099-of-00193.safetensors", + "model.layers.40.block_sparse_moe.experts.5.w3.weight": "model-00099-of-00193.safetensors", + "model.layers.40.block_sparse_moe.experts.6.w1.weight": "model-00099-of-00193.safetensors", + "model.layers.40.block_sparse_moe.experts.6.w2.weight": "model-00099-of-00193.safetensors", + "model.layers.40.block_sparse_moe.experts.6.w3.weight": "model-00099-of-00193.safetensors", + "model.layers.40.block_sparse_moe.experts.7.w1.weight": "model-00099-of-00193.safetensors", + "model.layers.40.block_sparse_moe.experts.7.w2.weight": "model-00099-of-00193.safetensors", + "model.layers.40.block_sparse_moe.experts.7.w3.weight": "model-00099-of-00193.safetensors", + "model.layers.40.block_sparse_moe.gate.weight": "model-00097-of-00193.safetensors", + "model.layers.40.input_layernorm.weight": "model-00099-of-00193.safetensors", + "model.layers.40.post_attention_layernorm.weight": "model-00099-of-00193.safetensors", + "model.layers.40.self_attn.k_proj.weight": "model-00097-of-00193.safetensors", + "model.layers.40.self_attn.o_proj.weight": "model-00097-of-00193.safetensors", + "model.layers.40.self_attn.q_proj.weight": "model-00097-of-00193.safetensors", + "model.layers.40.self_attn.v_proj.weight": "model-00097-of-00193.safetensors", + "model.layers.41.block_sparse_moe.experts.0.w1.weight": "model-00099-of-00193.safetensors", + "model.layers.41.block_sparse_moe.experts.0.w2.weight": "model-00099-of-00193.safetensors", + "model.layers.41.block_sparse_moe.experts.0.w3.weight": "model-00100-of-00193.safetensors", + "model.layers.41.block_sparse_moe.experts.1.w1.weight": "model-00100-of-00193.safetensors", + "model.layers.41.block_sparse_moe.experts.1.w2.weight": "model-00100-of-00193.safetensors", + "model.layers.41.block_sparse_moe.experts.1.w3.weight": "model-00100-of-00193.safetensors", + "model.layers.41.block_sparse_moe.experts.2.w1.weight": "model-00100-of-00193.safetensors", + "model.layers.41.block_sparse_moe.experts.2.w2.weight": "model-00100-of-00193.safetensors", + "model.layers.41.block_sparse_moe.experts.2.w3.weight": "model-00100-of-00193.safetensors", + "model.layers.41.block_sparse_moe.experts.3.w1.weight": "model-00100-of-00193.safetensors", + "model.layers.41.block_sparse_moe.experts.3.w2.weight": "model-00100-of-00193.safetensors", + "model.layers.41.block_sparse_moe.experts.3.w3.weight": "model-00100-of-00193.safetensors", + "model.layers.41.block_sparse_moe.experts.4.w1.weight": "model-00101-of-00193.safetensors", + "model.layers.41.block_sparse_moe.experts.4.w2.weight": "model-00101-of-00193.safetensors", + "model.layers.41.block_sparse_moe.experts.4.w3.weight": "model-00101-of-00193.safetensors", + "model.layers.41.block_sparse_moe.experts.5.w1.weight": "model-00101-of-00193.safetensors", + "model.layers.41.block_sparse_moe.experts.5.w2.weight": "model-00101-of-00193.safetensors", + "model.layers.41.block_sparse_moe.experts.5.w3.weight": "model-00101-of-00193.safetensors", + "model.layers.41.block_sparse_moe.experts.6.w1.weight": "model-00101-of-00193.safetensors", + "model.layers.41.block_sparse_moe.experts.6.w2.weight": "model-00101-of-00193.safetensors", + "model.layers.41.block_sparse_moe.experts.6.w3.weight": "model-00101-of-00193.safetensors", + "model.layers.41.block_sparse_moe.experts.7.w1.weight": "model-00101-of-00193.safetensors", + "model.layers.41.block_sparse_moe.experts.7.w2.weight": "model-00102-of-00193.safetensors", + "model.layers.41.block_sparse_moe.experts.7.w3.weight": "model-00102-of-00193.safetensors", + "model.layers.41.block_sparse_moe.gate.weight": "model-00099-of-00193.safetensors", + "model.layers.41.input_layernorm.weight": "model-00102-of-00193.safetensors", + "model.layers.41.post_attention_layernorm.weight": "model-00102-of-00193.safetensors", + "model.layers.41.self_attn.k_proj.weight": "model-00099-of-00193.safetensors", + "model.layers.41.self_attn.o_proj.weight": "model-00099-of-00193.safetensors", + "model.layers.41.self_attn.q_proj.weight": "model-00099-of-00193.safetensors", + "model.layers.41.self_attn.v_proj.weight": "model-00099-of-00193.safetensors", + "model.layers.42.block_sparse_moe.experts.0.w1.weight": "model-00102-of-00193.safetensors", + "model.layers.42.block_sparse_moe.experts.0.w2.weight": "model-00102-of-00193.safetensors", + "model.layers.42.block_sparse_moe.experts.0.w3.weight": "model-00102-of-00193.safetensors", + "model.layers.42.block_sparse_moe.experts.1.w1.weight": "model-00102-of-00193.safetensors", + "model.layers.42.block_sparse_moe.experts.1.w2.weight": "model-00102-of-00193.safetensors", + "model.layers.42.block_sparse_moe.experts.1.w3.weight": "model-00102-of-00193.safetensors", + "model.layers.42.block_sparse_moe.experts.2.w1.weight": "model-00102-of-00193.safetensors", + "model.layers.42.block_sparse_moe.experts.2.w2.weight": "model-00102-of-00193.safetensors", + "model.layers.42.block_sparse_moe.experts.2.w3.weight": "model-00103-of-00193.safetensors", + "model.layers.42.block_sparse_moe.experts.3.w1.weight": "model-00103-of-00193.safetensors", + "model.layers.42.block_sparse_moe.experts.3.w2.weight": "model-00103-of-00193.safetensors", + "model.layers.42.block_sparse_moe.experts.3.w3.weight": "model-00103-of-00193.safetensors", + "model.layers.42.block_sparse_moe.experts.4.w1.weight": "model-00103-of-00193.safetensors", + "model.layers.42.block_sparse_moe.experts.4.w2.weight": "model-00103-of-00193.safetensors", + "model.layers.42.block_sparse_moe.experts.4.w3.weight": "model-00103-of-00193.safetensors", + "model.layers.42.block_sparse_moe.experts.5.w1.weight": "model-00103-of-00193.safetensors", + "model.layers.42.block_sparse_moe.experts.5.w2.weight": "model-00103-of-00193.safetensors", + "model.layers.42.block_sparse_moe.experts.5.w3.weight": "model-00103-of-00193.safetensors", + "model.layers.42.block_sparse_moe.experts.6.w1.weight": "model-00104-of-00193.safetensors", + "model.layers.42.block_sparse_moe.experts.6.w2.weight": "model-00104-of-00193.safetensors", + "model.layers.42.block_sparse_moe.experts.6.w3.weight": "model-00104-of-00193.safetensors", + "model.layers.42.block_sparse_moe.experts.7.w1.weight": "model-00104-of-00193.safetensors", + "model.layers.42.block_sparse_moe.experts.7.w2.weight": "model-00104-of-00193.safetensors", + "model.layers.42.block_sparse_moe.experts.7.w3.weight": "model-00104-of-00193.safetensors", + "model.layers.42.block_sparse_moe.gate.weight": "model-00102-of-00193.safetensors", + "model.layers.42.input_layernorm.weight": "model-00104-of-00193.safetensors", + "model.layers.42.post_attention_layernorm.weight": "model-00104-of-00193.safetensors", + "model.layers.42.self_attn.k_proj.weight": "model-00102-of-00193.safetensors", + "model.layers.42.self_attn.o_proj.weight": "model-00102-of-00193.safetensors", + "model.layers.42.self_attn.q_proj.weight": "model-00102-of-00193.safetensors", + "model.layers.42.self_attn.v_proj.weight": "model-00102-of-00193.safetensors", + "model.layers.43.block_sparse_moe.experts.0.w1.weight": "model-00104-of-00193.safetensors", + "model.layers.43.block_sparse_moe.experts.0.w2.weight": "model-00104-of-00193.safetensors", + "model.layers.43.block_sparse_moe.experts.0.w3.weight": "model-00104-of-00193.safetensors", + "model.layers.43.block_sparse_moe.experts.1.w1.weight": "model-00104-of-00193.safetensors", + "model.layers.43.block_sparse_moe.experts.1.w2.weight": "model-00105-of-00193.safetensors", + "model.layers.43.block_sparse_moe.experts.1.w3.weight": "model-00105-of-00193.safetensors", + "model.layers.43.block_sparse_moe.experts.2.w1.weight": "model-00105-of-00193.safetensors", + "model.layers.43.block_sparse_moe.experts.2.w2.weight": "model-00105-of-00193.safetensors", + "model.layers.43.block_sparse_moe.experts.2.w3.weight": "model-00105-of-00193.safetensors", + "model.layers.43.block_sparse_moe.experts.3.w1.weight": "model-00105-of-00193.safetensors", + "model.layers.43.block_sparse_moe.experts.3.w2.weight": "model-00105-of-00193.safetensors", + "model.layers.43.block_sparse_moe.experts.3.w3.weight": "model-00105-of-00193.safetensors", + "model.layers.43.block_sparse_moe.experts.4.w1.weight": "model-00105-of-00193.safetensors", + "model.layers.43.block_sparse_moe.experts.4.w2.weight": "model-00105-of-00193.safetensors", + "model.layers.43.block_sparse_moe.experts.4.w3.weight": "model-00106-of-00193.safetensors", + "model.layers.43.block_sparse_moe.experts.5.w1.weight": "model-00106-of-00193.safetensors", + "model.layers.43.block_sparse_moe.experts.5.w2.weight": "model-00106-of-00193.safetensors", + "model.layers.43.block_sparse_moe.experts.5.w3.weight": "model-00106-of-00193.safetensors", + "model.layers.43.block_sparse_moe.experts.6.w1.weight": "model-00106-of-00193.safetensors", + "model.layers.43.block_sparse_moe.experts.6.w2.weight": "model-00106-of-00193.safetensors", + "model.layers.43.block_sparse_moe.experts.6.w3.weight": "model-00106-of-00193.safetensors", + "model.layers.43.block_sparse_moe.experts.7.w1.weight": "model-00106-of-00193.safetensors", + "model.layers.43.block_sparse_moe.experts.7.w2.weight": "model-00106-of-00193.safetensors", + "model.layers.43.block_sparse_moe.experts.7.w3.weight": "model-00106-of-00193.safetensors", + "model.layers.43.block_sparse_moe.gate.weight": "model-00104-of-00193.safetensors", + "model.layers.43.input_layernorm.weight": "model-00106-of-00193.safetensors", + "model.layers.43.post_attention_layernorm.weight": "model-00106-of-00193.safetensors", + "model.layers.43.self_attn.k_proj.weight": "model-00104-of-00193.safetensors", + "model.layers.43.self_attn.o_proj.weight": "model-00104-of-00193.safetensors", + "model.layers.43.self_attn.q_proj.weight": "model-00104-of-00193.safetensors", + "model.layers.43.self_attn.v_proj.weight": "model-00104-of-00193.safetensors", + "model.layers.44.block_sparse_moe.experts.0.w1.weight": "model-00107-of-00193.safetensors", + "model.layers.44.block_sparse_moe.experts.0.w2.weight": "model-00107-of-00193.safetensors", + "model.layers.44.block_sparse_moe.experts.0.w3.weight": "model-00107-of-00193.safetensors", + "model.layers.44.block_sparse_moe.experts.1.w1.weight": "model-00107-of-00193.safetensors", + "model.layers.44.block_sparse_moe.experts.1.w2.weight": "model-00107-of-00193.safetensors", + "model.layers.44.block_sparse_moe.experts.1.w3.weight": "model-00107-of-00193.safetensors", + "model.layers.44.block_sparse_moe.experts.2.w1.weight": "model-00107-of-00193.safetensors", + "model.layers.44.block_sparse_moe.experts.2.w2.weight": "model-00107-of-00193.safetensors", + "model.layers.44.block_sparse_moe.experts.2.w3.weight": "model-00107-of-00193.safetensors", + "model.layers.44.block_sparse_moe.experts.3.w1.weight": "model-00107-of-00193.safetensors", + "model.layers.44.block_sparse_moe.experts.3.w2.weight": "model-00108-of-00193.safetensors", + "model.layers.44.block_sparse_moe.experts.3.w3.weight": "model-00108-of-00193.safetensors", + "model.layers.44.block_sparse_moe.experts.4.w1.weight": "model-00108-of-00193.safetensors", + "model.layers.44.block_sparse_moe.experts.4.w2.weight": "model-00108-of-00193.safetensors", + "model.layers.44.block_sparse_moe.experts.4.w3.weight": "model-00108-of-00193.safetensors", + "model.layers.44.block_sparse_moe.experts.5.w1.weight": "model-00108-of-00193.safetensors", + "model.layers.44.block_sparse_moe.experts.5.w2.weight": "model-00108-of-00193.safetensors", + "model.layers.44.block_sparse_moe.experts.5.w3.weight": "model-00108-of-00193.safetensors", + "model.layers.44.block_sparse_moe.experts.6.w1.weight": "model-00108-of-00193.safetensors", + "model.layers.44.block_sparse_moe.experts.6.w2.weight": "model-00108-of-00193.safetensors", + "model.layers.44.block_sparse_moe.experts.6.w3.weight": "model-00109-of-00193.safetensors", + "model.layers.44.block_sparse_moe.experts.7.w1.weight": "model-00109-of-00193.safetensors", + "model.layers.44.block_sparse_moe.experts.7.w2.weight": "model-00109-of-00193.safetensors", + "model.layers.44.block_sparse_moe.experts.7.w3.weight": "model-00109-of-00193.safetensors", + "model.layers.44.block_sparse_moe.gate.weight": "model-00106-of-00193.safetensors", + "model.layers.44.input_layernorm.weight": "model-00109-of-00193.safetensors", + "model.layers.44.post_attention_layernorm.weight": "model-00109-of-00193.safetensors", + "model.layers.44.self_attn.k_proj.weight": "model-00106-of-00193.safetensors", + "model.layers.44.self_attn.o_proj.weight": "model-00106-of-00193.safetensors", + "model.layers.44.self_attn.q_proj.weight": "model-00106-of-00193.safetensors", + "model.layers.44.self_attn.v_proj.weight": "model-00106-of-00193.safetensors", + "model.layers.45.block_sparse_moe.experts.0.w1.weight": "model-00109-of-00193.safetensors", + "model.layers.45.block_sparse_moe.experts.0.w2.weight": "model-00109-of-00193.safetensors", + "model.layers.45.block_sparse_moe.experts.0.w3.weight": "model-00109-of-00193.safetensors", + "model.layers.45.block_sparse_moe.experts.1.w1.weight": "model-00109-of-00193.safetensors", + "model.layers.45.block_sparse_moe.experts.1.w2.weight": "model-00109-of-00193.safetensors", + "model.layers.45.block_sparse_moe.experts.1.w3.weight": "model-00109-of-00193.safetensors", + "model.layers.45.block_sparse_moe.experts.2.w1.weight": "model-00110-of-00193.safetensors", + "model.layers.45.block_sparse_moe.experts.2.w2.weight": "model-00110-of-00193.safetensors", + "model.layers.45.block_sparse_moe.experts.2.w3.weight": "model-00110-of-00193.safetensors", + "model.layers.45.block_sparse_moe.experts.3.w1.weight": "model-00110-of-00193.safetensors", + "model.layers.45.block_sparse_moe.experts.3.w2.weight": "model-00110-of-00193.safetensors", + "model.layers.45.block_sparse_moe.experts.3.w3.weight": "model-00110-of-00193.safetensors", + "model.layers.45.block_sparse_moe.experts.4.w1.weight": "model-00110-of-00193.safetensors", + "model.layers.45.block_sparse_moe.experts.4.w2.weight": "model-00110-of-00193.safetensors", + "model.layers.45.block_sparse_moe.experts.4.w3.weight": "model-00110-of-00193.safetensors", + "model.layers.45.block_sparse_moe.experts.5.w1.weight": "model-00110-of-00193.safetensors", + "model.layers.45.block_sparse_moe.experts.5.w2.weight": "model-00111-of-00193.safetensors", + "model.layers.45.block_sparse_moe.experts.5.w3.weight": "model-00111-of-00193.safetensors", + "model.layers.45.block_sparse_moe.experts.6.w1.weight": "model-00111-of-00193.safetensors", + "model.layers.45.block_sparse_moe.experts.6.w2.weight": "model-00111-of-00193.safetensors", + "model.layers.45.block_sparse_moe.experts.6.w3.weight": "model-00111-of-00193.safetensors", + "model.layers.45.block_sparse_moe.experts.7.w1.weight": "model-00111-of-00193.safetensors", + "model.layers.45.block_sparse_moe.experts.7.w2.weight": "model-00111-of-00193.safetensors", + "model.layers.45.block_sparse_moe.experts.7.w3.weight": "model-00111-of-00193.safetensors", + "model.layers.45.block_sparse_moe.gate.weight": "model-00109-of-00193.safetensors", + "model.layers.45.input_layernorm.weight": "model-00111-of-00193.safetensors", + "model.layers.45.post_attention_layernorm.weight": "model-00111-of-00193.safetensors", + "model.layers.45.self_attn.k_proj.weight": "model-00109-of-00193.safetensors", + "model.layers.45.self_attn.o_proj.weight": "model-00109-of-00193.safetensors", + "model.layers.45.self_attn.q_proj.weight": "model-00109-of-00193.safetensors", + "model.layers.45.self_attn.v_proj.weight": "model-00109-of-00193.safetensors", + "model.layers.46.block_sparse_moe.experts.0.w1.weight": "model-00111-of-00193.safetensors", + "model.layers.46.block_sparse_moe.experts.0.w2.weight": "model-00111-of-00193.safetensors", + "model.layers.46.block_sparse_moe.experts.0.w3.weight": "model-00112-of-00193.safetensors", + "model.layers.46.block_sparse_moe.experts.1.w1.weight": "model-00112-of-00193.safetensors", + "model.layers.46.block_sparse_moe.experts.1.w2.weight": "model-00112-of-00193.safetensors", + "model.layers.46.block_sparse_moe.experts.1.w3.weight": "model-00112-of-00193.safetensors", + "model.layers.46.block_sparse_moe.experts.2.w1.weight": "model-00112-of-00193.safetensors", + "model.layers.46.block_sparse_moe.experts.2.w2.weight": "model-00112-of-00193.safetensors", + "model.layers.46.block_sparse_moe.experts.2.w3.weight": "model-00112-of-00193.safetensors", + "model.layers.46.block_sparse_moe.experts.3.w1.weight": "model-00112-of-00193.safetensors", + "model.layers.46.block_sparse_moe.experts.3.w2.weight": "model-00112-of-00193.safetensors", + "model.layers.46.block_sparse_moe.experts.3.w3.weight": "model-00112-of-00193.safetensors", + "model.layers.46.block_sparse_moe.experts.4.w1.weight": "model-00113-of-00193.safetensors", + "model.layers.46.block_sparse_moe.experts.4.w2.weight": "model-00113-of-00193.safetensors", + "model.layers.46.block_sparse_moe.experts.4.w3.weight": "model-00113-of-00193.safetensors", + "model.layers.46.block_sparse_moe.experts.5.w1.weight": "model-00113-of-00193.safetensors", + "model.layers.46.block_sparse_moe.experts.5.w2.weight": "model-00113-of-00193.safetensors", + "model.layers.46.block_sparse_moe.experts.5.w3.weight": "model-00113-of-00193.safetensors", + "model.layers.46.block_sparse_moe.experts.6.w1.weight": "model-00113-of-00193.safetensors", + "model.layers.46.block_sparse_moe.experts.6.w2.weight": "model-00113-of-00193.safetensors", + "model.layers.46.block_sparse_moe.experts.6.w3.weight": "model-00113-of-00193.safetensors", + "model.layers.46.block_sparse_moe.experts.7.w1.weight": "model-00113-of-00193.safetensors", + "model.layers.46.block_sparse_moe.experts.7.w2.weight": "model-00114-of-00193.safetensors", + "model.layers.46.block_sparse_moe.experts.7.w3.weight": "model-00114-of-00193.safetensors", + "model.layers.46.block_sparse_moe.gate.weight": "model-00111-of-00193.safetensors", + "model.layers.46.input_layernorm.weight": "model-00114-of-00193.safetensors", + "model.layers.46.post_attention_layernorm.weight": "model-00114-of-00193.safetensors", + "model.layers.46.self_attn.k_proj.weight": "model-00111-of-00193.safetensors", + "model.layers.46.self_attn.o_proj.weight": "model-00111-of-00193.safetensors", + "model.layers.46.self_attn.q_proj.weight": "model-00111-of-00193.safetensors", + "model.layers.46.self_attn.v_proj.weight": "model-00111-of-00193.safetensors", + "model.layers.47.block_sparse_moe.experts.0.w1.weight": "model-00114-of-00193.safetensors", + "model.layers.47.block_sparse_moe.experts.0.w2.weight": "model-00114-of-00193.safetensors", + "model.layers.47.block_sparse_moe.experts.0.w3.weight": "model-00114-of-00193.safetensors", + "model.layers.47.block_sparse_moe.experts.1.w1.weight": "model-00114-of-00193.safetensors", + "model.layers.47.block_sparse_moe.experts.1.w2.weight": "model-00114-of-00193.safetensors", + "model.layers.47.block_sparse_moe.experts.1.w3.weight": "model-00114-of-00193.safetensors", + "model.layers.47.block_sparse_moe.experts.2.w1.weight": "model-00114-of-00193.safetensors", + "model.layers.47.block_sparse_moe.experts.2.w2.weight": "model-00114-of-00193.safetensors", + "model.layers.47.block_sparse_moe.experts.2.w3.weight": "model-00115-of-00193.safetensors", + "model.layers.47.block_sparse_moe.experts.3.w1.weight": "model-00115-of-00193.safetensors", + "model.layers.47.block_sparse_moe.experts.3.w2.weight": "model-00115-of-00193.safetensors", + "model.layers.47.block_sparse_moe.experts.3.w3.weight": "model-00115-of-00193.safetensors", + "model.layers.47.block_sparse_moe.experts.4.w1.weight": "model-00115-of-00193.safetensors", + "model.layers.47.block_sparse_moe.experts.4.w2.weight": "model-00115-of-00193.safetensors", + "model.layers.47.block_sparse_moe.experts.4.w3.weight": "model-00115-of-00193.safetensors", + "model.layers.47.block_sparse_moe.experts.5.w1.weight": "model-00115-of-00193.safetensors", + "model.layers.47.block_sparse_moe.experts.5.w2.weight": "model-00115-of-00193.safetensors", + "model.layers.47.block_sparse_moe.experts.5.w3.weight": "model-00115-of-00193.safetensors", + "model.layers.47.block_sparse_moe.experts.6.w1.weight": "model-00116-of-00193.safetensors", + "model.layers.47.block_sparse_moe.experts.6.w2.weight": "model-00116-of-00193.safetensors", + "model.layers.47.block_sparse_moe.experts.6.w3.weight": "model-00116-of-00193.safetensors", + "model.layers.47.block_sparse_moe.experts.7.w1.weight": "model-00116-of-00193.safetensors", + "model.layers.47.block_sparse_moe.experts.7.w2.weight": "model-00116-of-00193.safetensors", + "model.layers.47.block_sparse_moe.experts.7.w3.weight": "model-00116-of-00193.safetensors", + "model.layers.47.block_sparse_moe.gate.weight": "model-00114-of-00193.safetensors", + "model.layers.47.input_layernorm.weight": "model-00116-of-00193.safetensors", + "model.layers.47.post_attention_layernorm.weight": "model-00116-of-00193.safetensors", + "model.layers.47.self_attn.k_proj.weight": "model-00114-of-00193.safetensors", + "model.layers.47.self_attn.o_proj.weight": "model-00114-of-00193.safetensors", + "model.layers.47.self_attn.q_proj.weight": "model-00114-of-00193.safetensors", + "model.layers.47.self_attn.v_proj.weight": "model-00114-of-00193.safetensors", + "model.layers.48.block_sparse_moe.experts.0.w1.weight": "model-00116-of-00193.safetensors", + "model.layers.48.block_sparse_moe.experts.0.w2.weight": "model-00116-of-00193.safetensors", + "model.layers.48.block_sparse_moe.experts.0.w3.weight": "model-00116-of-00193.safetensors", + "model.layers.48.block_sparse_moe.experts.1.w1.weight": "model-00116-of-00193.safetensors", + "model.layers.48.block_sparse_moe.experts.1.w2.weight": "model-00117-of-00193.safetensors", + "model.layers.48.block_sparse_moe.experts.1.w3.weight": "model-00117-of-00193.safetensors", + "model.layers.48.block_sparse_moe.experts.2.w1.weight": "model-00117-of-00193.safetensors", + "model.layers.48.block_sparse_moe.experts.2.w2.weight": "model-00117-of-00193.safetensors", + "model.layers.48.block_sparse_moe.experts.2.w3.weight": "model-00117-of-00193.safetensors", + "model.layers.48.block_sparse_moe.experts.3.w1.weight": "model-00117-of-00193.safetensors", + "model.layers.48.block_sparse_moe.experts.3.w2.weight": "model-00117-of-00193.safetensors", + "model.layers.48.block_sparse_moe.experts.3.w3.weight": "model-00117-of-00193.safetensors", + "model.layers.48.block_sparse_moe.experts.4.w1.weight": "model-00117-of-00193.safetensors", + "model.layers.48.block_sparse_moe.experts.4.w2.weight": "model-00117-of-00193.safetensors", + "model.layers.48.block_sparse_moe.experts.4.w3.weight": "model-00118-of-00193.safetensors", + "model.layers.48.block_sparse_moe.experts.5.w1.weight": "model-00118-of-00193.safetensors", + "model.layers.48.block_sparse_moe.experts.5.w2.weight": "model-00118-of-00193.safetensors", + "model.layers.48.block_sparse_moe.experts.5.w3.weight": "model-00118-of-00193.safetensors", + "model.layers.48.block_sparse_moe.experts.6.w1.weight": "model-00118-of-00193.safetensors", + "model.layers.48.block_sparse_moe.experts.6.w2.weight": "model-00118-of-00193.safetensors", + "model.layers.48.block_sparse_moe.experts.6.w3.weight": "model-00118-of-00193.safetensors", + "model.layers.48.block_sparse_moe.experts.7.w1.weight": "model-00118-of-00193.safetensors", + "model.layers.48.block_sparse_moe.experts.7.w2.weight": "model-00118-of-00193.safetensors", + "model.layers.48.block_sparse_moe.experts.7.w3.weight": "model-00118-of-00193.safetensors", + "model.layers.48.block_sparse_moe.gate.weight": "model-00116-of-00193.safetensors", + "model.layers.48.input_layernorm.weight": "model-00118-of-00193.safetensors", + "model.layers.48.post_attention_layernorm.weight": "model-00118-of-00193.safetensors", + "model.layers.48.self_attn.k_proj.weight": "model-00116-of-00193.safetensors", + "model.layers.48.self_attn.o_proj.weight": "model-00116-of-00193.safetensors", + "model.layers.48.self_attn.q_proj.weight": "model-00116-of-00193.safetensors", + "model.layers.48.self_attn.v_proj.weight": "model-00116-of-00193.safetensors", + "model.layers.49.block_sparse_moe.experts.0.w1.weight": "model-00119-of-00193.safetensors", + "model.layers.49.block_sparse_moe.experts.0.w2.weight": "model-00119-of-00193.safetensors", + "model.layers.49.block_sparse_moe.experts.0.w3.weight": "model-00119-of-00193.safetensors", + "model.layers.49.block_sparse_moe.experts.1.w1.weight": "model-00119-of-00193.safetensors", + "model.layers.49.block_sparse_moe.experts.1.w2.weight": "model-00119-of-00193.safetensors", + "model.layers.49.block_sparse_moe.experts.1.w3.weight": "model-00119-of-00193.safetensors", + "model.layers.49.block_sparse_moe.experts.2.w1.weight": "model-00119-of-00193.safetensors", + "model.layers.49.block_sparse_moe.experts.2.w2.weight": "model-00119-of-00193.safetensors", + "model.layers.49.block_sparse_moe.experts.2.w3.weight": "model-00119-of-00193.safetensors", + "model.layers.49.block_sparse_moe.experts.3.w1.weight": "model-00119-of-00193.safetensors", + "model.layers.49.block_sparse_moe.experts.3.w2.weight": "model-00120-of-00193.safetensors", + "model.layers.49.block_sparse_moe.experts.3.w3.weight": "model-00120-of-00193.safetensors", + "model.layers.49.block_sparse_moe.experts.4.w1.weight": "model-00120-of-00193.safetensors", + "model.layers.49.block_sparse_moe.experts.4.w2.weight": "model-00120-of-00193.safetensors", + "model.layers.49.block_sparse_moe.experts.4.w3.weight": "model-00120-of-00193.safetensors", + "model.layers.49.block_sparse_moe.experts.5.w1.weight": "model-00120-of-00193.safetensors", + "model.layers.49.block_sparse_moe.experts.5.w2.weight": "model-00120-of-00193.safetensors", + "model.layers.49.block_sparse_moe.experts.5.w3.weight": "model-00120-of-00193.safetensors", + "model.layers.49.block_sparse_moe.experts.6.w1.weight": "model-00120-of-00193.safetensors", + "model.layers.49.block_sparse_moe.experts.6.w2.weight": "model-00120-of-00193.safetensors", + "model.layers.49.block_sparse_moe.experts.6.w3.weight": "model-00121-of-00193.safetensors", + "model.layers.49.block_sparse_moe.experts.7.w1.weight": "model-00121-of-00193.safetensors", + "model.layers.49.block_sparse_moe.experts.7.w2.weight": "model-00121-of-00193.safetensors", + "model.layers.49.block_sparse_moe.experts.7.w3.weight": "model-00121-of-00193.safetensors", + "model.layers.49.block_sparse_moe.gate.weight": "model-00118-of-00193.safetensors", + "model.layers.49.input_layernorm.weight": "model-00121-of-00193.safetensors", + "model.layers.49.post_attention_layernorm.weight": "model-00121-of-00193.safetensors", + "model.layers.49.self_attn.k_proj.weight": "model-00118-of-00193.safetensors", + "model.layers.49.self_attn.o_proj.weight": "model-00118-of-00193.safetensors", + "model.layers.49.self_attn.q_proj.weight": "model-00118-of-00193.safetensors", + "model.layers.49.self_attn.v_proj.weight": "model-00118-of-00193.safetensors", + "model.layers.5.block_sparse_moe.experts.0.w1.weight": "model-00013-of-00193.safetensors", + "model.layers.5.block_sparse_moe.experts.0.w2.weight": "model-00013-of-00193.safetensors", + "model.layers.5.block_sparse_moe.experts.0.w3.weight": "model-00013-of-00193.safetensors", + "model.layers.5.block_sparse_moe.experts.1.w1.weight": "model-00013-of-00193.safetensors", + "model.layers.5.block_sparse_moe.experts.1.w2.weight": "model-00013-of-00193.safetensors", + "model.layers.5.block_sparse_moe.experts.1.w3.weight": "model-00013-of-00193.safetensors", + "model.layers.5.block_sparse_moe.experts.2.w1.weight": "model-00014-of-00193.safetensors", + "model.layers.5.block_sparse_moe.experts.2.w2.weight": "model-00014-of-00193.safetensors", + "model.layers.5.block_sparse_moe.experts.2.w3.weight": "model-00014-of-00193.safetensors", + "model.layers.5.block_sparse_moe.experts.3.w1.weight": "model-00014-of-00193.safetensors", + "model.layers.5.block_sparse_moe.experts.3.w2.weight": "model-00014-of-00193.safetensors", + "model.layers.5.block_sparse_moe.experts.3.w3.weight": "model-00014-of-00193.safetensors", + "model.layers.5.block_sparse_moe.experts.4.w1.weight": "model-00014-of-00193.safetensors", + "model.layers.5.block_sparse_moe.experts.4.w2.weight": "model-00014-of-00193.safetensors", + "model.layers.5.block_sparse_moe.experts.4.w3.weight": "model-00014-of-00193.safetensors", + "model.layers.5.block_sparse_moe.experts.5.w1.weight": "model-00014-of-00193.safetensors", + "model.layers.5.block_sparse_moe.experts.5.w2.weight": "model-00015-of-00193.safetensors", + "model.layers.5.block_sparse_moe.experts.5.w3.weight": "model-00015-of-00193.safetensors", + "model.layers.5.block_sparse_moe.experts.6.w1.weight": "model-00015-of-00193.safetensors", + "model.layers.5.block_sparse_moe.experts.6.w2.weight": "model-00015-of-00193.safetensors", + "model.layers.5.block_sparse_moe.experts.6.w3.weight": "model-00015-of-00193.safetensors", + "model.layers.5.block_sparse_moe.experts.7.w1.weight": "model-00015-of-00193.safetensors", + "model.layers.5.block_sparse_moe.experts.7.w2.weight": "model-00015-of-00193.safetensors", + "model.layers.5.block_sparse_moe.experts.7.w3.weight": "model-00015-of-00193.safetensors", + "model.layers.5.block_sparse_moe.gate.weight": "model-00013-of-00193.safetensors", + "model.layers.5.input_layernorm.weight": "model-00015-of-00193.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00015-of-00193.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00013-of-00193.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00013-of-00193.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00013-of-00193.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00013-of-00193.safetensors", + "model.layers.50.block_sparse_moe.experts.0.w1.weight": "model-00121-of-00193.safetensors", + "model.layers.50.block_sparse_moe.experts.0.w2.weight": "model-00121-of-00193.safetensors", + "model.layers.50.block_sparse_moe.experts.0.w3.weight": "model-00121-of-00193.safetensors", + "model.layers.50.block_sparse_moe.experts.1.w1.weight": "model-00121-of-00193.safetensors", + "model.layers.50.block_sparse_moe.experts.1.w2.weight": "model-00121-of-00193.safetensors", + "model.layers.50.block_sparse_moe.experts.1.w3.weight": "model-00121-of-00193.safetensors", + "model.layers.50.block_sparse_moe.experts.2.w1.weight": "model-00122-of-00193.safetensors", + "model.layers.50.block_sparse_moe.experts.2.w2.weight": "model-00122-of-00193.safetensors", + "model.layers.50.block_sparse_moe.experts.2.w3.weight": "model-00122-of-00193.safetensors", + "model.layers.50.block_sparse_moe.experts.3.w1.weight": "model-00122-of-00193.safetensors", + "model.layers.50.block_sparse_moe.experts.3.w2.weight": "model-00122-of-00193.safetensors", + "model.layers.50.block_sparse_moe.experts.3.w3.weight": "model-00122-of-00193.safetensors", + "model.layers.50.block_sparse_moe.experts.4.w1.weight": "model-00122-of-00193.safetensors", + "model.layers.50.block_sparse_moe.experts.4.w2.weight": "model-00122-of-00193.safetensors", + "model.layers.50.block_sparse_moe.experts.4.w3.weight": "model-00122-of-00193.safetensors", + "model.layers.50.block_sparse_moe.experts.5.w1.weight": "model-00122-of-00193.safetensors", + "model.layers.50.block_sparse_moe.experts.5.w2.weight": "model-00123-of-00193.safetensors", + "model.layers.50.block_sparse_moe.experts.5.w3.weight": "model-00123-of-00193.safetensors", + "model.layers.50.block_sparse_moe.experts.6.w1.weight": "model-00123-of-00193.safetensors", + "model.layers.50.block_sparse_moe.experts.6.w2.weight": "model-00123-of-00193.safetensors", + "model.layers.50.block_sparse_moe.experts.6.w3.weight": "model-00123-of-00193.safetensors", + "model.layers.50.block_sparse_moe.experts.7.w1.weight": "model-00123-of-00193.safetensors", + "model.layers.50.block_sparse_moe.experts.7.w2.weight": "model-00123-of-00193.safetensors", + "model.layers.50.block_sparse_moe.experts.7.w3.weight": "model-00123-of-00193.safetensors", + "model.layers.50.block_sparse_moe.gate.weight": "model-00121-of-00193.safetensors", + "model.layers.50.input_layernorm.weight": "model-00123-of-00193.safetensors", + "model.layers.50.post_attention_layernorm.weight": "model-00123-of-00193.safetensors", + "model.layers.50.self_attn.k_proj.weight": "model-00121-of-00193.safetensors", + "model.layers.50.self_attn.o_proj.weight": "model-00121-of-00193.safetensors", + "model.layers.50.self_attn.q_proj.weight": "model-00121-of-00193.safetensors", + "model.layers.50.self_attn.v_proj.weight": "model-00121-of-00193.safetensors", + "model.layers.51.block_sparse_moe.experts.0.w1.weight": "model-00123-of-00193.safetensors", + "model.layers.51.block_sparse_moe.experts.0.w2.weight": "model-00123-of-00193.safetensors", + "model.layers.51.block_sparse_moe.experts.0.w3.weight": "model-00124-of-00193.safetensors", + "model.layers.51.block_sparse_moe.experts.1.w1.weight": "model-00124-of-00193.safetensors", + "model.layers.51.block_sparse_moe.experts.1.w2.weight": "model-00124-of-00193.safetensors", + "model.layers.51.block_sparse_moe.experts.1.w3.weight": "model-00124-of-00193.safetensors", + "model.layers.51.block_sparse_moe.experts.2.w1.weight": "model-00124-of-00193.safetensors", + "model.layers.51.block_sparse_moe.experts.2.w2.weight": "model-00124-of-00193.safetensors", + "model.layers.51.block_sparse_moe.experts.2.w3.weight": "model-00124-of-00193.safetensors", + "model.layers.51.block_sparse_moe.experts.3.w1.weight": "model-00124-of-00193.safetensors", + "model.layers.51.block_sparse_moe.experts.3.w2.weight": "model-00124-of-00193.safetensors", + "model.layers.51.block_sparse_moe.experts.3.w3.weight": "model-00124-of-00193.safetensors", + "model.layers.51.block_sparse_moe.experts.4.w1.weight": "model-00125-of-00193.safetensors", + "model.layers.51.block_sparse_moe.experts.4.w2.weight": "model-00125-of-00193.safetensors", + "model.layers.51.block_sparse_moe.experts.4.w3.weight": "model-00125-of-00193.safetensors", + "model.layers.51.block_sparse_moe.experts.5.w1.weight": "model-00125-of-00193.safetensors", + "model.layers.51.block_sparse_moe.experts.5.w2.weight": "model-00125-of-00193.safetensors", + "model.layers.51.block_sparse_moe.experts.5.w3.weight": "model-00125-of-00193.safetensors", + "model.layers.51.block_sparse_moe.experts.6.w1.weight": "model-00125-of-00193.safetensors", + "model.layers.51.block_sparse_moe.experts.6.w2.weight": "model-00125-of-00193.safetensors", + "model.layers.51.block_sparse_moe.experts.6.w3.weight": "model-00125-of-00193.safetensors", + "model.layers.51.block_sparse_moe.experts.7.w1.weight": "model-00125-of-00193.safetensors", + "model.layers.51.block_sparse_moe.experts.7.w2.weight": "model-00126-of-00193.safetensors", + "model.layers.51.block_sparse_moe.experts.7.w3.weight": "model-00126-of-00193.safetensors", + "model.layers.51.block_sparse_moe.gate.weight": "model-00123-of-00193.safetensors", + "model.layers.51.input_layernorm.weight": "model-00126-of-00193.safetensors", + "model.layers.51.post_attention_layernorm.weight": "model-00126-of-00193.safetensors", + "model.layers.51.self_attn.k_proj.weight": "model-00123-of-00193.safetensors", + "model.layers.51.self_attn.o_proj.weight": "model-00123-of-00193.safetensors", + "model.layers.51.self_attn.q_proj.weight": "model-00123-of-00193.safetensors", + "model.layers.51.self_attn.v_proj.weight": "model-00123-of-00193.safetensors", + "model.layers.52.block_sparse_moe.experts.0.w1.weight": "model-00126-of-00193.safetensors", + "model.layers.52.block_sparse_moe.experts.0.w2.weight": "model-00126-of-00193.safetensors", + "model.layers.52.block_sparse_moe.experts.0.w3.weight": "model-00126-of-00193.safetensors", + "model.layers.52.block_sparse_moe.experts.1.w1.weight": "model-00126-of-00193.safetensors", + "model.layers.52.block_sparse_moe.experts.1.w2.weight": "model-00126-of-00193.safetensors", + "model.layers.52.block_sparse_moe.experts.1.w3.weight": "model-00126-of-00193.safetensors", + "model.layers.52.block_sparse_moe.experts.2.w1.weight": "model-00126-of-00193.safetensors", + "model.layers.52.block_sparse_moe.experts.2.w2.weight": "model-00126-of-00193.safetensors", + "model.layers.52.block_sparse_moe.experts.2.w3.weight": "model-00127-of-00193.safetensors", + "model.layers.52.block_sparse_moe.experts.3.w1.weight": "model-00127-of-00193.safetensors", + "model.layers.52.block_sparse_moe.experts.3.w2.weight": "model-00127-of-00193.safetensors", + "model.layers.52.block_sparse_moe.experts.3.w3.weight": "model-00127-of-00193.safetensors", + "model.layers.52.block_sparse_moe.experts.4.w1.weight": "model-00127-of-00193.safetensors", + "model.layers.52.block_sparse_moe.experts.4.w2.weight": "model-00127-of-00193.safetensors", + "model.layers.52.block_sparse_moe.experts.4.w3.weight": "model-00127-of-00193.safetensors", + "model.layers.52.block_sparse_moe.experts.5.w1.weight": "model-00127-of-00193.safetensors", + "model.layers.52.block_sparse_moe.experts.5.w2.weight": "model-00127-of-00193.safetensors", + "model.layers.52.block_sparse_moe.experts.5.w3.weight": "model-00127-of-00193.safetensors", + "model.layers.52.block_sparse_moe.experts.6.w1.weight": "model-00128-of-00193.safetensors", + "model.layers.52.block_sparse_moe.experts.6.w2.weight": "model-00128-of-00193.safetensors", + "model.layers.52.block_sparse_moe.experts.6.w3.weight": "model-00128-of-00193.safetensors", + "model.layers.52.block_sparse_moe.experts.7.w1.weight": "model-00128-of-00193.safetensors", + "model.layers.52.block_sparse_moe.experts.7.w2.weight": "model-00128-of-00193.safetensors", + "model.layers.52.block_sparse_moe.experts.7.w3.weight": "model-00128-of-00193.safetensors", + "model.layers.52.block_sparse_moe.gate.weight": "model-00126-of-00193.safetensors", + "model.layers.52.input_layernorm.weight": "model-00128-of-00193.safetensors", + "model.layers.52.post_attention_layernorm.weight": "model-00128-of-00193.safetensors", + "model.layers.52.self_attn.k_proj.weight": "model-00126-of-00193.safetensors", + "model.layers.52.self_attn.o_proj.weight": "model-00126-of-00193.safetensors", + "model.layers.52.self_attn.q_proj.weight": "model-00126-of-00193.safetensors", + "model.layers.52.self_attn.v_proj.weight": "model-00126-of-00193.safetensors", + "model.layers.53.block_sparse_moe.experts.0.w1.weight": "model-00128-of-00193.safetensors", + "model.layers.53.block_sparse_moe.experts.0.w2.weight": "model-00128-of-00193.safetensors", + "model.layers.53.block_sparse_moe.experts.0.w3.weight": "model-00128-of-00193.safetensors", + "model.layers.53.block_sparse_moe.experts.1.w1.weight": "model-00128-of-00193.safetensors", + "model.layers.53.block_sparse_moe.experts.1.w2.weight": "model-00129-of-00193.safetensors", + "model.layers.53.block_sparse_moe.experts.1.w3.weight": "model-00129-of-00193.safetensors", + "model.layers.53.block_sparse_moe.experts.2.w1.weight": "model-00129-of-00193.safetensors", + "model.layers.53.block_sparse_moe.experts.2.w2.weight": "model-00129-of-00193.safetensors", + "model.layers.53.block_sparse_moe.experts.2.w3.weight": "model-00129-of-00193.safetensors", + "model.layers.53.block_sparse_moe.experts.3.w1.weight": "model-00129-of-00193.safetensors", + "model.layers.53.block_sparse_moe.experts.3.w2.weight": "model-00129-of-00193.safetensors", + "model.layers.53.block_sparse_moe.experts.3.w3.weight": "model-00129-of-00193.safetensors", + "model.layers.53.block_sparse_moe.experts.4.w1.weight": "model-00129-of-00193.safetensors", + "model.layers.53.block_sparse_moe.experts.4.w2.weight": "model-00129-of-00193.safetensors", + "model.layers.53.block_sparse_moe.experts.4.w3.weight": "model-00130-of-00193.safetensors", + "model.layers.53.block_sparse_moe.experts.5.w1.weight": "model-00130-of-00193.safetensors", + "model.layers.53.block_sparse_moe.experts.5.w2.weight": "model-00130-of-00193.safetensors", + "model.layers.53.block_sparse_moe.experts.5.w3.weight": "model-00130-of-00193.safetensors", + "model.layers.53.block_sparse_moe.experts.6.w1.weight": "model-00130-of-00193.safetensors", + "model.layers.53.block_sparse_moe.experts.6.w2.weight": "model-00130-of-00193.safetensors", + "model.layers.53.block_sparse_moe.experts.6.w3.weight": "model-00130-of-00193.safetensors", + "model.layers.53.block_sparse_moe.experts.7.w1.weight": "model-00130-of-00193.safetensors", + "model.layers.53.block_sparse_moe.experts.7.w2.weight": "model-00130-of-00193.safetensors", + "model.layers.53.block_sparse_moe.experts.7.w3.weight": "model-00130-of-00193.safetensors", + "model.layers.53.block_sparse_moe.gate.weight": "model-00128-of-00193.safetensors", + "model.layers.53.input_layernorm.weight": "model-00130-of-00193.safetensors", + "model.layers.53.post_attention_layernorm.weight": "model-00130-of-00193.safetensors", + "model.layers.53.self_attn.k_proj.weight": "model-00128-of-00193.safetensors", + "model.layers.53.self_attn.o_proj.weight": "model-00128-of-00193.safetensors", + "model.layers.53.self_attn.q_proj.weight": "model-00128-of-00193.safetensors", + "model.layers.53.self_attn.v_proj.weight": "model-00128-of-00193.safetensors", + "model.layers.54.block_sparse_moe.experts.0.w1.weight": "model-00131-of-00193.safetensors", + "model.layers.54.block_sparse_moe.experts.0.w2.weight": "model-00131-of-00193.safetensors", + "model.layers.54.block_sparse_moe.experts.0.w3.weight": "model-00131-of-00193.safetensors", + "model.layers.54.block_sparse_moe.experts.1.w1.weight": "model-00131-of-00193.safetensors", + "model.layers.54.block_sparse_moe.experts.1.w2.weight": "model-00131-of-00193.safetensors", + "model.layers.54.block_sparse_moe.experts.1.w3.weight": "model-00131-of-00193.safetensors", + "model.layers.54.block_sparse_moe.experts.2.w1.weight": "model-00131-of-00193.safetensors", + "model.layers.54.block_sparse_moe.experts.2.w2.weight": "model-00131-of-00193.safetensors", + "model.layers.54.block_sparse_moe.experts.2.w3.weight": "model-00131-of-00193.safetensors", + "model.layers.54.block_sparse_moe.experts.3.w1.weight": "model-00131-of-00193.safetensors", + "model.layers.54.block_sparse_moe.experts.3.w2.weight": "model-00132-of-00193.safetensors", + "model.layers.54.block_sparse_moe.experts.3.w3.weight": "model-00132-of-00193.safetensors", + "model.layers.54.block_sparse_moe.experts.4.w1.weight": "model-00132-of-00193.safetensors", + "model.layers.54.block_sparse_moe.experts.4.w2.weight": "model-00132-of-00193.safetensors", + "model.layers.54.block_sparse_moe.experts.4.w3.weight": "model-00132-of-00193.safetensors", + "model.layers.54.block_sparse_moe.experts.5.w1.weight": "model-00132-of-00193.safetensors", + "model.layers.54.block_sparse_moe.experts.5.w2.weight": "model-00132-of-00193.safetensors", + "model.layers.54.block_sparse_moe.experts.5.w3.weight": "model-00132-of-00193.safetensors", + "model.layers.54.block_sparse_moe.experts.6.w1.weight": "model-00132-of-00193.safetensors", + "model.layers.54.block_sparse_moe.experts.6.w2.weight": "model-00132-of-00193.safetensors", + "model.layers.54.block_sparse_moe.experts.6.w3.weight": "model-00133-of-00193.safetensors", + "model.layers.54.block_sparse_moe.experts.7.w1.weight": "model-00133-of-00193.safetensors", + "model.layers.54.block_sparse_moe.experts.7.w2.weight": "model-00133-of-00193.safetensors", + "model.layers.54.block_sparse_moe.experts.7.w3.weight": "model-00133-of-00193.safetensors", + "model.layers.54.block_sparse_moe.gate.weight": "model-00130-of-00193.safetensors", + "model.layers.54.input_layernorm.weight": "model-00133-of-00193.safetensors", + "model.layers.54.post_attention_layernorm.weight": "model-00133-of-00193.safetensors", + "model.layers.54.self_attn.k_proj.weight": "model-00130-of-00193.safetensors", + "model.layers.54.self_attn.o_proj.weight": "model-00130-of-00193.safetensors", + "model.layers.54.self_attn.q_proj.weight": "model-00130-of-00193.safetensors", + "model.layers.54.self_attn.v_proj.weight": "model-00130-of-00193.safetensors", + "model.layers.55.block_sparse_moe.experts.0.w1.weight": "model-00133-of-00193.safetensors", + "model.layers.55.block_sparse_moe.experts.0.w2.weight": "model-00133-of-00193.safetensors", + "model.layers.55.block_sparse_moe.experts.0.w3.weight": "model-00133-of-00193.safetensors", + "model.layers.55.block_sparse_moe.experts.1.w1.weight": "model-00133-of-00193.safetensors", + "model.layers.55.block_sparse_moe.experts.1.w2.weight": "model-00133-of-00193.safetensors", + "model.layers.55.block_sparse_moe.experts.1.w3.weight": "model-00133-of-00193.safetensors", + "model.layers.55.block_sparse_moe.experts.2.w1.weight": "model-00134-of-00193.safetensors", + "model.layers.55.block_sparse_moe.experts.2.w2.weight": "model-00134-of-00193.safetensors", + "model.layers.55.block_sparse_moe.experts.2.w3.weight": "model-00134-of-00193.safetensors", + "model.layers.55.block_sparse_moe.experts.3.w1.weight": "model-00134-of-00193.safetensors", + "model.layers.55.block_sparse_moe.experts.3.w2.weight": "model-00134-of-00193.safetensors", + "model.layers.55.block_sparse_moe.experts.3.w3.weight": "model-00134-of-00193.safetensors", + "model.layers.55.block_sparse_moe.experts.4.w1.weight": "model-00134-of-00193.safetensors", + "model.layers.55.block_sparse_moe.experts.4.w2.weight": "model-00134-of-00193.safetensors", + "model.layers.55.block_sparse_moe.experts.4.w3.weight": "model-00134-of-00193.safetensors", + "model.layers.55.block_sparse_moe.experts.5.w1.weight": "model-00134-of-00193.safetensors", + "model.layers.55.block_sparse_moe.experts.5.w2.weight": "model-00135-of-00193.safetensors", + "model.layers.55.block_sparse_moe.experts.5.w3.weight": "model-00135-of-00193.safetensors", + "model.layers.55.block_sparse_moe.experts.6.w1.weight": "model-00135-of-00193.safetensors", + "model.layers.55.block_sparse_moe.experts.6.w2.weight": "model-00135-of-00193.safetensors", + "model.layers.55.block_sparse_moe.experts.6.w3.weight": "model-00135-of-00193.safetensors", + "model.layers.55.block_sparse_moe.experts.7.w1.weight": "model-00135-of-00193.safetensors", + "model.layers.55.block_sparse_moe.experts.7.w2.weight": "model-00135-of-00193.safetensors", + "model.layers.55.block_sparse_moe.experts.7.w3.weight": "model-00135-of-00193.safetensors", + "model.layers.55.block_sparse_moe.gate.weight": "model-00133-of-00193.safetensors", + "model.layers.55.input_layernorm.weight": "model-00135-of-00193.safetensors", + "model.layers.55.post_attention_layernorm.weight": "model-00135-of-00193.safetensors", + "model.layers.55.self_attn.k_proj.weight": "model-00133-of-00193.safetensors", + "model.layers.55.self_attn.o_proj.weight": "model-00133-of-00193.safetensors", + "model.layers.55.self_attn.q_proj.weight": "model-00133-of-00193.safetensors", + "model.layers.55.self_attn.v_proj.weight": "model-00133-of-00193.safetensors", + "model.layers.56.block_sparse_moe.experts.0.w1.weight": "model-00135-of-00193.safetensors", + "model.layers.56.block_sparse_moe.experts.0.w2.weight": "model-00135-of-00193.safetensors", + "model.layers.56.block_sparse_moe.experts.0.w3.weight": "model-00136-of-00193.safetensors", + "model.layers.56.block_sparse_moe.experts.1.w1.weight": "model-00136-of-00193.safetensors", + "model.layers.56.block_sparse_moe.experts.1.w2.weight": "model-00136-of-00193.safetensors", + "model.layers.56.block_sparse_moe.experts.1.w3.weight": "model-00136-of-00193.safetensors", + "model.layers.56.block_sparse_moe.experts.2.w1.weight": "model-00136-of-00193.safetensors", + "model.layers.56.block_sparse_moe.experts.2.w2.weight": "model-00136-of-00193.safetensors", + "model.layers.56.block_sparse_moe.experts.2.w3.weight": "model-00136-of-00193.safetensors", + "model.layers.56.block_sparse_moe.experts.3.w1.weight": "model-00136-of-00193.safetensors", + "model.layers.56.block_sparse_moe.experts.3.w2.weight": "model-00136-of-00193.safetensors", + "model.layers.56.block_sparse_moe.experts.3.w3.weight": "model-00136-of-00193.safetensors", + "model.layers.56.block_sparse_moe.experts.4.w1.weight": "model-00137-of-00193.safetensors", + "model.layers.56.block_sparse_moe.experts.4.w2.weight": "model-00137-of-00193.safetensors", + "model.layers.56.block_sparse_moe.experts.4.w3.weight": "model-00137-of-00193.safetensors", + "model.layers.56.block_sparse_moe.experts.5.w1.weight": "model-00137-of-00193.safetensors", + "model.layers.56.block_sparse_moe.experts.5.w2.weight": "model-00137-of-00193.safetensors", + "model.layers.56.block_sparse_moe.experts.5.w3.weight": "model-00137-of-00193.safetensors", + "model.layers.56.block_sparse_moe.experts.6.w1.weight": "model-00137-of-00193.safetensors", + "model.layers.56.block_sparse_moe.experts.6.w2.weight": "model-00137-of-00193.safetensors", + "model.layers.56.block_sparse_moe.experts.6.w3.weight": "model-00137-of-00193.safetensors", + "model.layers.56.block_sparse_moe.experts.7.w1.weight": "model-00137-of-00193.safetensors", + "model.layers.56.block_sparse_moe.experts.7.w2.weight": "model-00138-of-00193.safetensors", + "model.layers.56.block_sparse_moe.experts.7.w3.weight": "model-00138-of-00193.safetensors", + "model.layers.56.block_sparse_moe.gate.weight": "model-00135-of-00193.safetensors", + "model.layers.56.input_layernorm.weight": "model-00138-of-00193.safetensors", + "model.layers.56.post_attention_layernorm.weight": "model-00138-of-00193.safetensors", + "model.layers.56.self_attn.k_proj.weight": "model-00135-of-00193.safetensors", + "model.layers.56.self_attn.o_proj.weight": "model-00135-of-00193.safetensors", + "model.layers.56.self_attn.q_proj.weight": "model-00135-of-00193.safetensors", + "model.layers.56.self_attn.v_proj.weight": "model-00135-of-00193.safetensors", + "model.layers.57.block_sparse_moe.experts.0.w1.weight": "model-00138-of-00193.safetensors", + "model.layers.57.block_sparse_moe.experts.0.w2.weight": "model-00138-of-00193.safetensors", + "model.layers.57.block_sparse_moe.experts.0.w3.weight": "model-00138-of-00193.safetensors", + "model.layers.57.block_sparse_moe.experts.1.w1.weight": "model-00138-of-00193.safetensors", + "model.layers.57.block_sparse_moe.experts.1.w2.weight": "model-00138-of-00193.safetensors", + "model.layers.57.block_sparse_moe.experts.1.w3.weight": "model-00138-of-00193.safetensors", + "model.layers.57.block_sparse_moe.experts.2.w1.weight": "model-00138-of-00193.safetensors", + "model.layers.57.block_sparse_moe.experts.2.w2.weight": "model-00138-of-00193.safetensors", + "model.layers.57.block_sparse_moe.experts.2.w3.weight": "model-00139-of-00193.safetensors", + "model.layers.57.block_sparse_moe.experts.3.w1.weight": "model-00139-of-00193.safetensors", + "model.layers.57.block_sparse_moe.experts.3.w2.weight": "model-00139-of-00193.safetensors", + "model.layers.57.block_sparse_moe.experts.3.w3.weight": "model-00139-of-00193.safetensors", + "model.layers.57.block_sparse_moe.experts.4.w1.weight": "model-00139-of-00193.safetensors", + "model.layers.57.block_sparse_moe.experts.4.w2.weight": "model-00139-of-00193.safetensors", + "model.layers.57.block_sparse_moe.experts.4.w3.weight": "model-00139-of-00193.safetensors", + "model.layers.57.block_sparse_moe.experts.5.w1.weight": "model-00139-of-00193.safetensors", + "model.layers.57.block_sparse_moe.experts.5.w2.weight": "model-00139-of-00193.safetensors", + "model.layers.57.block_sparse_moe.experts.5.w3.weight": "model-00139-of-00193.safetensors", + "model.layers.57.block_sparse_moe.experts.6.w1.weight": "model-00140-of-00193.safetensors", + "model.layers.57.block_sparse_moe.experts.6.w2.weight": "model-00140-of-00193.safetensors", + "model.layers.57.block_sparse_moe.experts.6.w3.weight": "model-00140-of-00193.safetensors", + "model.layers.57.block_sparse_moe.experts.7.w1.weight": "model-00140-of-00193.safetensors", + "model.layers.57.block_sparse_moe.experts.7.w2.weight": "model-00140-of-00193.safetensors", + "model.layers.57.block_sparse_moe.experts.7.w3.weight": "model-00140-of-00193.safetensors", + "model.layers.57.block_sparse_moe.gate.weight": "model-00138-of-00193.safetensors", + "model.layers.57.input_layernorm.weight": "model-00140-of-00193.safetensors", + "model.layers.57.post_attention_layernorm.weight": "model-00140-of-00193.safetensors", + "model.layers.57.self_attn.k_proj.weight": "model-00138-of-00193.safetensors", + "model.layers.57.self_attn.o_proj.weight": "model-00138-of-00193.safetensors", + "model.layers.57.self_attn.q_proj.weight": "model-00138-of-00193.safetensors", + "model.layers.57.self_attn.v_proj.weight": "model-00138-of-00193.safetensors", + "model.layers.58.block_sparse_moe.experts.0.w1.weight": "model-00140-of-00193.safetensors", + "model.layers.58.block_sparse_moe.experts.0.w2.weight": "model-00140-of-00193.safetensors", + "model.layers.58.block_sparse_moe.experts.0.w3.weight": "model-00140-of-00193.safetensors", + "model.layers.58.block_sparse_moe.experts.1.w1.weight": "model-00140-of-00193.safetensors", + "model.layers.58.block_sparse_moe.experts.1.w2.weight": "model-00141-of-00193.safetensors", + "model.layers.58.block_sparse_moe.experts.1.w3.weight": "model-00141-of-00193.safetensors", + "model.layers.58.block_sparse_moe.experts.2.w1.weight": "model-00141-of-00193.safetensors", + "model.layers.58.block_sparse_moe.experts.2.w2.weight": "model-00141-of-00193.safetensors", + "model.layers.58.block_sparse_moe.experts.2.w3.weight": "model-00141-of-00193.safetensors", + "model.layers.58.block_sparse_moe.experts.3.w1.weight": "model-00141-of-00193.safetensors", + "model.layers.58.block_sparse_moe.experts.3.w2.weight": "model-00141-of-00193.safetensors", + "model.layers.58.block_sparse_moe.experts.3.w3.weight": "model-00141-of-00193.safetensors", + "model.layers.58.block_sparse_moe.experts.4.w1.weight": "model-00141-of-00193.safetensors", + "model.layers.58.block_sparse_moe.experts.4.w2.weight": "model-00141-of-00193.safetensors", + "model.layers.58.block_sparse_moe.experts.4.w3.weight": "model-00142-of-00193.safetensors", + "model.layers.58.block_sparse_moe.experts.5.w1.weight": "model-00142-of-00193.safetensors", + "model.layers.58.block_sparse_moe.experts.5.w2.weight": "model-00142-of-00193.safetensors", + "model.layers.58.block_sparse_moe.experts.5.w3.weight": "model-00142-of-00193.safetensors", + "model.layers.58.block_sparse_moe.experts.6.w1.weight": "model-00142-of-00193.safetensors", + "model.layers.58.block_sparse_moe.experts.6.w2.weight": "model-00142-of-00193.safetensors", + "model.layers.58.block_sparse_moe.experts.6.w3.weight": "model-00142-of-00193.safetensors", + "model.layers.58.block_sparse_moe.experts.7.w1.weight": "model-00142-of-00193.safetensors", + "model.layers.58.block_sparse_moe.experts.7.w2.weight": "model-00142-of-00193.safetensors", + "model.layers.58.block_sparse_moe.experts.7.w3.weight": "model-00142-of-00193.safetensors", + "model.layers.58.block_sparse_moe.gate.weight": "model-00140-of-00193.safetensors", + "model.layers.58.input_layernorm.weight": "model-00142-of-00193.safetensors", + "model.layers.58.post_attention_layernorm.weight": "model-00142-of-00193.safetensors", + "model.layers.58.self_attn.k_proj.weight": "model-00140-of-00193.safetensors", + "model.layers.58.self_attn.o_proj.weight": "model-00140-of-00193.safetensors", + "model.layers.58.self_attn.q_proj.weight": "model-00140-of-00193.safetensors", + "model.layers.58.self_attn.v_proj.weight": "model-00140-of-00193.safetensors", + "model.layers.59.block_sparse_moe.experts.0.w1.weight": "model-00143-of-00193.safetensors", + "model.layers.59.block_sparse_moe.experts.0.w2.weight": "model-00143-of-00193.safetensors", + "model.layers.59.block_sparse_moe.experts.0.w3.weight": "model-00143-of-00193.safetensors", + "model.layers.59.block_sparse_moe.experts.1.w1.weight": "model-00143-of-00193.safetensors", + "model.layers.59.block_sparse_moe.experts.1.w2.weight": "model-00143-of-00193.safetensors", + "model.layers.59.block_sparse_moe.experts.1.w3.weight": "model-00143-of-00193.safetensors", + "model.layers.59.block_sparse_moe.experts.2.w1.weight": "model-00143-of-00193.safetensors", + "model.layers.59.block_sparse_moe.experts.2.w2.weight": "model-00143-of-00193.safetensors", + "model.layers.59.block_sparse_moe.experts.2.w3.weight": "model-00143-of-00193.safetensors", + "model.layers.59.block_sparse_moe.experts.3.w1.weight": "model-00143-of-00193.safetensors", + "model.layers.59.block_sparse_moe.experts.3.w2.weight": "model-00144-of-00193.safetensors", + "model.layers.59.block_sparse_moe.experts.3.w3.weight": "model-00144-of-00193.safetensors", + "model.layers.59.block_sparse_moe.experts.4.w1.weight": "model-00144-of-00193.safetensors", + "model.layers.59.block_sparse_moe.experts.4.w2.weight": "model-00144-of-00193.safetensors", + "model.layers.59.block_sparse_moe.experts.4.w3.weight": "model-00144-of-00193.safetensors", + "model.layers.59.block_sparse_moe.experts.5.w1.weight": "model-00144-of-00193.safetensors", + "model.layers.59.block_sparse_moe.experts.5.w2.weight": "model-00144-of-00193.safetensors", + "model.layers.59.block_sparse_moe.experts.5.w3.weight": "model-00144-of-00193.safetensors", + "model.layers.59.block_sparse_moe.experts.6.w1.weight": "model-00144-of-00193.safetensors", + "model.layers.59.block_sparse_moe.experts.6.w2.weight": "model-00144-of-00193.safetensors", + "model.layers.59.block_sparse_moe.experts.6.w3.weight": "model-00145-of-00193.safetensors", + "model.layers.59.block_sparse_moe.experts.7.w1.weight": "model-00145-of-00193.safetensors", + "model.layers.59.block_sparse_moe.experts.7.w2.weight": "model-00145-of-00193.safetensors", + "model.layers.59.block_sparse_moe.experts.7.w3.weight": "model-00145-of-00193.safetensors", + "model.layers.59.block_sparse_moe.gate.weight": "model-00142-of-00193.safetensors", + "model.layers.59.input_layernorm.weight": "model-00145-of-00193.safetensors", + "model.layers.59.post_attention_layernorm.weight": "model-00145-of-00193.safetensors", + "model.layers.59.self_attn.k_proj.weight": "model-00142-of-00193.safetensors", + "model.layers.59.self_attn.o_proj.weight": "model-00142-of-00193.safetensors", + "model.layers.59.self_attn.q_proj.weight": "model-00142-of-00193.safetensors", + "model.layers.59.self_attn.v_proj.weight": "model-00142-of-00193.safetensors", + "model.layers.6.block_sparse_moe.experts.0.w1.weight": "model-00015-of-00193.safetensors", + "model.layers.6.block_sparse_moe.experts.0.w2.weight": "model-00015-of-00193.safetensors", + "model.layers.6.block_sparse_moe.experts.0.w3.weight": "model-00016-of-00193.safetensors", + "model.layers.6.block_sparse_moe.experts.1.w1.weight": "model-00016-of-00193.safetensors", + "model.layers.6.block_sparse_moe.experts.1.w2.weight": "model-00016-of-00193.safetensors", + "model.layers.6.block_sparse_moe.experts.1.w3.weight": "model-00016-of-00193.safetensors", + "model.layers.6.block_sparse_moe.experts.2.w1.weight": "model-00016-of-00193.safetensors", + "model.layers.6.block_sparse_moe.experts.2.w2.weight": "model-00016-of-00193.safetensors", + "model.layers.6.block_sparse_moe.experts.2.w3.weight": "model-00016-of-00193.safetensors", + "model.layers.6.block_sparse_moe.experts.3.w1.weight": "model-00016-of-00193.safetensors", + "model.layers.6.block_sparse_moe.experts.3.w2.weight": "model-00016-of-00193.safetensors", + "model.layers.6.block_sparse_moe.experts.3.w3.weight": "model-00016-of-00193.safetensors", + "model.layers.6.block_sparse_moe.experts.4.w1.weight": "model-00017-of-00193.safetensors", + "model.layers.6.block_sparse_moe.experts.4.w2.weight": "model-00017-of-00193.safetensors", + "model.layers.6.block_sparse_moe.experts.4.w3.weight": "model-00017-of-00193.safetensors", + "model.layers.6.block_sparse_moe.experts.5.w1.weight": "model-00017-of-00193.safetensors", + "model.layers.6.block_sparse_moe.experts.5.w2.weight": "model-00017-of-00193.safetensors", + "model.layers.6.block_sparse_moe.experts.5.w3.weight": "model-00017-of-00193.safetensors", + "model.layers.6.block_sparse_moe.experts.6.w1.weight": "model-00017-of-00193.safetensors", + "model.layers.6.block_sparse_moe.experts.6.w2.weight": "model-00017-of-00193.safetensors", + "model.layers.6.block_sparse_moe.experts.6.w3.weight": "model-00017-of-00193.safetensors", + "model.layers.6.block_sparse_moe.experts.7.w1.weight": "model-00017-of-00193.safetensors", + "model.layers.6.block_sparse_moe.experts.7.w2.weight": "model-00018-of-00193.safetensors", + "model.layers.6.block_sparse_moe.experts.7.w3.weight": "model-00018-of-00193.safetensors", + "model.layers.6.block_sparse_moe.gate.weight": "model-00015-of-00193.safetensors", + "model.layers.6.input_layernorm.weight": "model-00018-of-00193.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00018-of-00193.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00015-of-00193.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00015-of-00193.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00015-of-00193.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00015-of-00193.safetensors", + "model.layers.60.block_sparse_moe.experts.0.w1.weight": "model-00145-of-00193.safetensors", + "model.layers.60.block_sparse_moe.experts.0.w2.weight": "model-00145-of-00193.safetensors", + "model.layers.60.block_sparse_moe.experts.0.w3.weight": "model-00145-of-00193.safetensors", + "model.layers.60.block_sparse_moe.experts.1.w1.weight": "model-00145-of-00193.safetensors", + "model.layers.60.block_sparse_moe.experts.1.w2.weight": "model-00145-of-00193.safetensors", + "model.layers.60.block_sparse_moe.experts.1.w3.weight": "model-00145-of-00193.safetensors", + "model.layers.60.block_sparse_moe.experts.2.w1.weight": "model-00146-of-00193.safetensors", + "model.layers.60.block_sparse_moe.experts.2.w2.weight": "model-00146-of-00193.safetensors", + "model.layers.60.block_sparse_moe.experts.2.w3.weight": "model-00146-of-00193.safetensors", + "model.layers.60.block_sparse_moe.experts.3.w1.weight": "model-00146-of-00193.safetensors", + "model.layers.60.block_sparse_moe.experts.3.w2.weight": "model-00146-of-00193.safetensors", + "model.layers.60.block_sparse_moe.experts.3.w3.weight": "model-00146-of-00193.safetensors", + "model.layers.60.block_sparse_moe.experts.4.w1.weight": "model-00146-of-00193.safetensors", + "model.layers.60.block_sparse_moe.experts.4.w2.weight": "model-00146-of-00193.safetensors", + "model.layers.60.block_sparse_moe.experts.4.w3.weight": "model-00146-of-00193.safetensors", + "model.layers.60.block_sparse_moe.experts.5.w1.weight": "model-00146-of-00193.safetensors", + "model.layers.60.block_sparse_moe.experts.5.w2.weight": "model-00147-of-00193.safetensors", + "model.layers.60.block_sparse_moe.experts.5.w3.weight": "model-00147-of-00193.safetensors", + "model.layers.60.block_sparse_moe.experts.6.w1.weight": "model-00147-of-00193.safetensors", + "model.layers.60.block_sparse_moe.experts.6.w2.weight": "model-00147-of-00193.safetensors", + "model.layers.60.block_sparse_moe.experts.6.w3.weight": "model-00147-of-00193.safetensors", + "model.layers.60.block_sparse_moe.experts.7.w1.weight": "model-00147-of-00193.safetensors", + "model.layers.60.block_sparse_moe.experts.7.w2.weight": "model-00147-of-00193.safetensors", + "model.layers.60.block_sparse_moe.experts.7.w3.weight": "model-00147-of-00193.safetensors", + "model.layers.60.block_sparse_moe.gate.weight": "model-00145-of-00193.safetensors", + "model.layers.60.input_layernorm.weight": "model-00147-of-00193.safetensors", + "model.layers.60.post_attention_layernorm.weight": "model-00147-of-00193.safetensors", + "model.layers.60.self_attn.k_proj.weight": "model-00145-of-00193.safetensors", + "model.layers.60.self_attn.o_proj.weight": "model-00145-of-00193.safetensors", + "model.layers.60.self_attn.q_proj.weight": "model-00145-of-00193.safetensors", + "model.layers.60.self_attn.v_proj.weight": "model-00145-of-00193.safetensors", + "model.layers.61.block_sparse_moe.experts.0.w1.weight": "model-00147-of-00193.safetensors", + "model.layers.61.block_sparse_moe.experts.0.w2.weight": "model-00147-of-00193.safetensors", + "model.layers.61.block_sparse_moe.experts.0.w3.weight": "model-00148-of-00193.safetensors", + "model.layers.61.block_sparse_moe.experts.1.w1.weight": "model-00148-of-00193.safetensors", + "model.layers.61.block_sparse_moe.experts.1.w2.weight": "model-00148-of-00193.safetensors", + "model.layers.61.block_sparse_moe.experts.1.w3.weight": "model-00148-of-00193.safetensors", + "model.layers.61.block_sparse_moe.experts.2.w1.weight": "model-00148-of-00193.safetensors", + "model.layers.61.block_sparse_moe.experts.2.w2.weight": "model-00148-of-00193.safetensors", + "model.layers.61.block_sparse_moe.experts.2.w3.weight": "model-00148-of-00193.safetensors", + "model.layers.61.block_sparse_moe.experts.3.w1.weight": "model-00148-of-00193.safetensors", + "model.layers.61.block_sparse_moe.experts.3.w2.weight": "model-00148-of-00193.safetensors", + "model.layers.61.block_sparse_moe.experts.3.w3.weight": "model-00148-of-00193.safetensors", + "model.layers.61.block_sparse_moe.experts.4.w1.weight": "model-00149-of-00193.safetensors", + "model.layers.61.block_sparse_moe.experts.4.w2.weight": "model-00149-of-00193.safetensors", + "model.layers.61.block_sparse_moe.experts.4.w3.weight": "model-00149-of-00193.safetensors", + "model.layers.61.block_sparse_moe.experts.5.w1.weight": "model-00149-of-00193.safetensors", + "model.layers.61.block_sparse_moe.experts.5.w2.weight": "model-00149-of-00193.safetensors", + "model.layers.61.block_sparse_moe.experts.5.w3.weight": "model-00149-of-00193.safetensors", + "model.layers.61.block_sparse_moe.experts.6.w1.weight": "model-00149-of-00193.safetensors", + "model.layers.61.block_sparse_moe.experts.6.w2.weight": "model-00149-of-00193.safetensors", + "model.layers.61.block_sparse_moe.experts.6.w3.weight": "model-00149-of-00193.safetensors", + "model.layers.61.block_sparse_moe.experts.7.w1.weight": "model-00149-of-00193.safetensors", + "model.layers.61.block_sparse_moe.experts.7.w2.weight": "model-00150-of-00193.safetensors", + "model.layers.61.block_sparse_moe.experts.7.w3.weight": "model-00150-of-00193.safetensors", + "model.layers.61.block_sparse_moe.gate.weight": "model-00147-of-00193.safetensors", + "model.layers.61.input_layernorm.weight": "model-00150-of-00193.safetensors", + "model.layers.61.post_attention_layernorm.weight": "model-00150-of-00193.safetensors", + "model.layers.61.self_attn.k_proj.weight": "model-00147-of-00193.safetensors", + "model.layers.61.self_attn.o_proj.weight": "model-00147-of-00193.safetensors", + "model.layers.61.self_attn.q_proj.weight": "model-00147-of-00193.safetensors", + "model.layers.61.self_attn.v_proj.weight": "model-00147-of-00193.safetensors", + "model.layers.62.block_sparse_moe.experts.0.w1.weight": "model-00150-of-00193.safetensors", + "model.layers.62.block_sparse_moe.experts.0.w2.weight": "model-00150-of-00193.safetensors", + "model.layers.62.block_sparse_moe.experts.0.w3.weight": "model-00150-of-00193.safetensors", + "model.layers.62.block_sparse_moe.experts.1.w1.weight": "model-00150-of-00193.safetensors", + "model.layers.62.block_sparse_moe.experts.1.w2.weight": "model-00150-of-00193.safetensors", + "model.layers.62.block_sparse_moe.experts.1.w3.weight": "model-00150-of-00193.safetensors", + "model.layers.62.block_sparse_moe.experts.2.w1.weight": "model-00150-of-00193.safetensors", + "model.layers.62.block_sparse_moe.experts.2.w2.weight": "model-00150-of-00193.safetensors", + "model.layers.62.block_sparse_moe.experts.2.w3.weight": "model-00151-of-00193.safetensors", + "model.layers.62.block_sparse_moe.experts.3.w1.weight": "model-00151-of-00193.safetensors", + "model.layers.62.block_sparse_moe.experts.3.w2.weight": "model-00151-of-00193.safetensors", + "model.layers.62.block_sparse_moe.experts.3.w3.weight": "model-00151-of-00193.safetensors", + "model.layers.62.block_sparse_moe.experts.4.w1.weight": "model-00151-of-00193.safetensors", + "model.layers.62.block_sparse_moe.experts.4.w2.weight": "model-00151-of-00193.safetensors", + "model.layers.62.block_sparse_moe.experts.4.w3.weight": "model-00151-of-00193.safetensors", + "model.layers.62.block_sparse_moe.experts.5.w1.weight": "model-00151-of-00193.safetensors", + "model.layers.62.block_sparse_moe.experts.5.w2.weight": "model-00151-of-00193.safetensors", + "model.layers.62.block_sparse_moe.experts.5.w3.weight": "model-00151-of-00193.safetensors", + "model.layers.62.block_sparse_moe.experts.6.w1.weight": "model-00152-of-00193.safetensors", + "model.layers.62.block_sparse_moe.experts.6.w2.weight": "model-00152-of-00193.safetensors", + "model.layers.62.block_sparse_moe.experts.6.w3.weight": "model-00152-of-00193.safetensors", + "model.layers.62.block_sparse_moe.experts.7.w1.weight": "model-00152-of-00193.safetensors", + "model.layers.62.block_sparse_moe.experts.7.w2.weight": "model-00152-of-00193.safetensors", + "model.layers.62.block_sparse_moe.experts.7.w3.weight": "model-00152-of-00193.safetensors", + "model.layers.62.block_sparse_moe.gate.weight": "model-00150-of-00193.safetensors", + "model.layers.62.input_layernorm.weight": "model-00152-of-00193.safetensors", + "model.layers.62.post_attention_layernorm.weight": "model-00152-of-00193.safetensors", + "model.layers.62.self_attn.k_proj.weight": "model-00150-of-00193.safetensors", + "model.layers.62.self_attn.o_proj.weight": "model-00150-of-00193.safetensors", + "model.layers.62.self_attn.q_proj.weight": "model-00150-of-00193.safetensors", + "model.layers.62.self_attn.v_proj.weight": "model-00150-of-00193.safetensors", + "model.layers.63.block_sparse_moe.experts.0.w1.weight": "model-00152-of-00193.safetensors", + "model.layers.63.block_sparse_moe.experts.0.w2.weight": "model-00152-of-00193.safetensors", + "model.layers.63.block_sparse_moe.experts.0.w3.weight": "model-00152-of-00193.safetensors", + "model.layers.63.block_sparse_moe.experts.1.w1.weight": "model-00152-of-00193.safetensors", + "model.layers.63.block_sparse_moe.experts.1.w2.weight": "model-00153-of-00193.safetensors", + "model.layers.63.block_sparse_moe.experts.1.w3.weight": "model-00153-of-00193.safetensors", + "model.layers.63.block_sparse_moe.experts.2.w1.weight": "model-00153-of-00193.safetensors", + "model.layers.63.block_sparse_moe.experts.2.w2.weight": "model-00153-of-00193.safetensors", + "model.layers.63.block_sparse_moe.experts.2.w3.weight": "model-00153-of-00193.safetensors", + "model.layers.63.block_sparse_moe.experts.3.w1.weight": "model-00153-of-00193.safetensors", + "model.layers.63.block_sparse_moe.experts.3.w2.weight": "model-00153-of-00193.safetensors", + "model.layers.63.block_sparse_moe.experts.3.w3.weight": "model-00153-of-00193.safetensors", + "model.layers.63.block_sparse_moe.experts.4.w1.weight": "model-00153-of-00193.safetensors", + "model.layers.63.block_sparse_moe.experts.4.w2.weight": "model-00153-of-00193.safetensors", + "model.layers.63.block_sparse_moe.experts.4.w3.weight": "model-00154-of-00193.safetensors", + "model.layers.63.block_sparse_moe.experts.5.w1.weight": "model-00154-of-00193.safetensors", + "model.layers.63.block_sparse_moe.experts.5.w2.weight": "model-00154-of-00193.safetensors", + "model.layers.63.block_sparse_moe.experts.5.w3.weight": "model-00154-of-00193.safetensors", + "model.layers.63.block_sparse_moe.experts.6.w1.weight": "model-00154-of-00193.safetensors", + "model.layers.63.block_sparse_moe.experts.6.w2.weight": "model-00154-of-00193.safetensors", + "model.layers.63.block_sparse_moe.experts.6.w3.weight": "model-00154-of-00193.safetensors", + "model.layers.63.block_sparse_moe.experts.7.w1.weight": "model-00154-of-00193.safetensors", + "model.layers.63.block_sparse_moe.experts.7.w2.weight": "model-00154-of-00193.safetensors", + "model.layers.63.block_sparse_moe.experts.7.w3.weight": "model-00154-of-00193.safetensors", + "model.layers.63.block_sparse_moe.gate.weight": "model-00152-of-00193.safetensors", + "model.layers.63.input_layernorm.weight": "model-00154-of-00193.safetensors", + "model.layers.63.post_attention_layernorm.weight": "model-00154-of-00193.safetensors", + "model.layers.63.self_attn.k_proj.weight": "model-00152-of-00193.safetensors", + "model.layers.63.self_attn.o_proj.weight": "model-00152-of-00193.safetensors", + "model.layers.63.self_attn.q_proj.weight": "model-00152-of-00193.safetensors", + "model.layers.63.self_attn.v_proj.weight": "model-00152-of-00193.safetensors", + "model.layers.64.block_sparse_moe.experts.0.w1.weight": "model-00155-of-00193.safetensors", + "model.layers.64.block_sparse_moe.experts.0.w2.weight": "model-00155-of-00193.safetensors", + "model.layers.64.block_sparse_moe.experts.0.w3.weight": "model-00155-of-00193.safetensors", + "model.layers.64.block_sparse_moe.experts.1.w1.weight": "model-00155-of-00193.safetensors", + "model.layers.64.block_sparse_moe.experts.1.w2.weight": "model-00155-of-00193.safetensors", + "model.layers.64.block_sparse_moe.experts.1.w3.weight": "model-00155-of-00193.safetensors", + "model.layers.64.block_sparse_moe.experts.2.w1.weight": "model-00155-of-00193.safetensors", + "model.layers.64.block_sparse_moe.experts.2.w2.weight": "model-00155-of-00193.safetensors", + "model.layers.64.block_sparse_moe.experts.2.w3.weight": "model-00155-of-00193.safetensors", + "model.layers.64.block_sparse_moe.experts.3.w1.weight": "model-00155-of-00193.safetensors", + "model.layers.64.block_sparse_moe.experts.3.w2.weight": "model-00156-of-00193.safetensors", + "model.layers.64.block_sparse_moe.experts.3.w3.weight": "model-00156-of-00193.safetensors", + "model.layers.64.block_sparse_moe.experts.4.w1.weight": "model-00156-of-00193.safetensors", + "model.layers.64.block_sparse_moe.experts.4.w2.weight": "model-00156-of-00193.safetensors", + "model.layers.64.block_sparse_moe.experts.4.w3.weight": "model-00156-of-00193.safetensors", + "model.layers.64.block_sparse_moe.experts.5.w1.weight": "model-00156-of-00193.safetensors", + "model.layers.64.block_sparse_moe.experts.5.w2.weight": "model-00156-of-00193.safetensors", + "model.layers.64.block_sparse_moe.experts.5.w3.weight": "model-00156-of-00193.safetensors", + "model.layers.64.block_sparse_moe.experts.6.w1.weight": "model-00156-of-00193.safetensors", + "model.layers.64.block_sparse_moe.experts.6.w2.weight": "model-00156-of-00193.safetensors", + "model.layers.64.block_sparse_moe.experts.6.w3.weight": "model-00157-of-00193.safetensors", + "model.layers.64.block_sparse_moe.experts.7.w1.weight": "model-00157-of-00193.safetensors", + "model.layers.64.block_sparse_moe.experts.7.w2.weight": "model-00157-of-00193.safetensors", + "model.layers.64.block_sparse_moe.experts.7.w3.weight": "model-00157-of-00193.safetensors", + "model.layers.64.block_sparse_moe.gate.weight": "model-00154-of-00193.safetensors", + "model.layers.64.input_layernorm.weight": "model-00157-of-00193.safetensors", + "model.layers.64.post_attention_layernorm.weight": "model-00157-of-00193.safetensors", + "model.layers.64.self_attn.k_proj.weight": "model-00154-of-00193.safetensors", + "model.layers.64.self_attn.o_proj.weight": "model-00154-of-00193.safetensors", + "model.layers.64.self_attn.q_proj.weight": "model-00154-of-00193.safetensors", + "model.layers.64.self_attn.v_proj.weight": "model-00154-of-00193.safetensors", + "model.layers.65.block_sparse_moe.experts.0.w1.weight": "model-00157-of-00193.safetensors", + "model.layers.65.block_sparse_moe.experts.0.w2.weight": "model-00157-of-00193.safetensors", + "model.layers.65.block_sparse_moe.experts.0.w3.weight": "model-00157-of-00193.safetensors", + "model.layers.65.block_sparse_moe.experts.1.w1.weight": "model-00157-of-00193.safetensors", + "model.layers.65.block_sparse_moe.experts.1.w2.weight": "model-00157-of-00193.safetensors", + "model.layers.65.block_sparse_moe.experts.1.w3.weight": "model-00157-of-00193.safetensors", + "model.layers.65.block_sparse_moe.experts.2.w1.weight": "model-00158-of-00193.safetensors", + "model.layers.65.block_sparse_moe.experts.2.w2.weight": "model-00158-of-00193.safetensors", + "model.layers.65.block_sparse_moe.experts.2.w3.weight": "model-00158-of-00193.safetensors", + "model.layers.65.block_sparse_moe.experts.3.w1.weight": "model-00158-of-00193.safetensors", + "model.layers.65.block_sparse_moe.experts.3.w2.weight": "model-00158-of-00193.safetensors", + "model.layers.65.block_sparse_moe.experts.3.w3.weight": "model-00158-of-00193.safetensors", + "model.layers.65.block_sparse_moe.experts.4.w1.weight": "model-00158-of-00193.safetensors", + "model.layers.65.block_sparse_moe.experts.4.w2.weight": "model-00158-of-00193.safetensors", + "model.layers.65.block_sparse_moe.experts.4.w3.weight": "model-00158-of-00193.safetensors", + "model.layers.65.block_sparse_moe.experts.5.w1.weight": "model-00158-of-00193.safetensors", + "model.layers.65.block_sparse_moe.experts.5.w2.weight": "model-00159-of-00193.safetensors", + "model.layers.65.block_sparse_moe.experts.5.w3.weight": "model-00159-of-00193.safetensors", + "model.layers.65.block_sparse_moe.experts.6.w1.weight": "model-00159-of-00193.safetensors", + "model.layers.65.block_sparse_moe.experts.6.w2.weight": "model-00159-of-00193.safetensors", + "model.layers.65.block_sparse_moe.experts.6.w3.weight": "model-00159-of-00193.safetensors", + "model.layers.65.block_sparse_moe.experts.7.w1.weight": "model-00159-of-00193.safetensors", + "model.layers.65.block_sparse_moe.experts.7.w2.weight": "model-00159-of-00193.safetensors", + "model.layers.65.block_sparse_moe.experts.7.w3.weight": "model-00159-of-00193.safetensors", + "model.layers.65.block_sparse_moe.gate.weight": "model-00157-of-00193.safetensors", + "model.layers.65.input_layernorm.weight": "model-00159-of-00193.safetensors", + "model.layers.65.post_attention_layernorm.weight": "model-00159-of-00193.safetensors", + "model.layers.65.self_attn.k_proj.weight": "model-00157-of-00193.safetensors", + "model.layers.65.self_attn.o_proj.weight": "model-00157-of-00193.safetensors", + "model.layers.65.self_attn.q_proj.weight": "model-00157-of-00193.safetensors", + "model.layers.65.self_attn.v_proj.weight": "model-00157-of-00193.safetensors", + "model.layers.66.block_sparse_moe.experts.0.w1.weight": "model-00159-of-00193.safetensors", + "model.layers.66.block_sparse_moe.experts.0.w2.weight": "model-00159-of-00193.safetensors", + "model.layers.66.block_sparse_moe.experts.0.w3.weight": "model-00160-of-00193.safetensors", + "model.layers.66.block_sparse_moe.experts.1.w1.weight": "model-00160-of-00193.safetensors", + "model.layers.66.block_sparse_moe.experts.1.w2.weight": "model-00160-of-00193.safetensors", + "model.layers.66.block_sparse_moe.experts.1.w3.weight": "model-00160-of-00193.safetensors", + "model.layers.66.block_sparse_moe.experts.2.w1.weight": "model-00160-of-00193.safetensors", + "model.layers.66.block_sparse_moe.experts.2.w2.weight": "model-00160-of-00193.safetensors", + "model.layers.66.block_sparse_moe.experts.2.w3.weight": "model-00160-of-00193.safetensors", + "model.layers.66.block_sparse_moe.experts.3.w1.weight": "model-00160-of-00193.safetensors", + "model.layers.66.block_sparse_moe.experts.3.w2.weight": "model-00160-of-00193.safetensors", + "model.layers.66.block_sparse_moe.experts.3.w3.weight": "model-00160-of-00193.safetensors", + "model.layers.66.block_sparse_moe.experts.4.w1.weight": "model-00161-of-00193.safetensors", + "model.layers.66.block_sparse_moe.experts.4.w2.weight": "model-00161-of-00193.safetensors", + "model.layers.66.block_sparse_moe.experts.4.w3.weight": "model-00161-of-00193.safetensors", + "model.layers.66.block_sparse_moe.experts.5.w1.weight": "model-00161-of-00193.safetensors", + "model.layers.66.block_sparse_moe.experts.5.w2.weight": "model-00161-of-00193.safetensors", + "model.layers.66.block_sparse_moe.experts.5.w3.weight": "model-00161-of-00193.safetensors", + "model.layers.66.block_sparse_moe.experts.6.w1.weight": "model-00161-of-00193.safetensors", + "model.layers.66.block_sparse_moe.experts.6.w2.weight": "model-00161-of-00193.safetensors", + "model.layers.66.block_sparse_moe.experts.6.w3.weight": "model-00161-of-00193.safetensors", + "model.layers.66.block_sparse_moe.experts.7.w1.weight": "model-00161-of-00193.safetensors", + "model.layers.66.block_sparse_moe.experts.7.w2.weight": "model-00162-of-00193.safetensors", + "model.layers.66.block_sparse_moe.experts.7.w3.weight": "model-00162-of-00193.safetensors", + "model.layers.66.block_sparse_moe.gate.weight": "model-00159-of-00193.safetensors", + "model.layers.66.input_layernorm.weight": "model-00162-of-00193.safetensors", + "model.layers.66.post_attention_layernorm.weight": "model-00162-of-00193.safetensors", + "model.layers.66.self_attn.k_proj.weight": "model-00159-of-00193.safetensors", + "model.layers.66.self_attn.o_proj.weight": "model-00159-of-00193.safetensors", + "model.layers.66.self_attn.q_proj.weight": "model-00159-of-00193.safetensors", + "model.layers.66.self_attn.v_proj.weight": "model-00159-of-00193.safetensors", + "model.layers.67.block_sparse_moe.experts.0.w1.weight": "model-00162-of-00193.safetensors", + "model.layers.67.block_sparse_moe.experts.0.w2.weight": "model-00162-of-00193.safetensors", + "model.layers.67.block_sparse_moe.experts.0.w3.weight": "model-00162-of-00193.safetensors", + "model.layers.67.block_sparse_moe.experts.1.w1.weight": "model-00162-of-00193.safetensors", + "model.layers.67.block_sparse_moe.experts.1.w2.weight": "model-00162-of-00193.safetensors", + "model.layers.67.block_sparse_moe.experts.1.w3.weight": "model-00162-of-00193.safetensors", + "model.layers.67.block_sparse_moe.experts.2.w1.weight": "model-00162-of-00193.safetensors", + "model.layers.67.block_sparse_moe.experts.2.w2.weight": "model-00162-of-00193.safetensors", + "model.layers.67.block_sparse_moe.experts.2.w3.weight": "model-00163-of-00193.safetensors", + "model.layers.67.block_sparse_moe.experts.3.w1.weight": "model-00163-of-00193.safetensors", + "model.layers.67.block_sparse_moe.experts.3.w2.weight": "model-00163-of-00193.safetensors", + "model.layers.67.block_sparse_moe.experts.3.w3.weight": "model-00163-of-00193.safetensors", + "model.layers.67.block_sparse_moe.experts.4.w1.weight": "model-00163-of-00193.safetensors", + "model.layers.67.block_sparse_moe.experts.4.w2.weight": "model-00163-of-00193.safetensors", + "model.layers.67.block_sparse_moe.experts.4.w3.weight": "model-00163-of-00193.safetensors", + "model.layers.67.block_sparse_moe.experts.5.w1.weight": "model-00163-of-00193.safetensors", + "model.layers.67.block_sparse_moe.experts.5.w2.weight": "model-00163-of-00193.safetensors", + "model.layers.67.block_sparse_moe.experts.5.w3.weight": "model-00163-of-00193.safetensors", + "model.layers.67.block_sparse_moe.experts.6.w1.weight": "model-00164-of-00193.safetensors", + "model.layers.67.block_sparse_moe.experts.6.w2.weight": "model-00164-of-00193.safetensors", + "model.layers.67.block_sparse_moe.experts.6.w3.weight": "model-00164-of-00193.safetensors", + "model.layers.67.block_sparse_moe.experts.7.w1.weight": "model-00164-of-00193.safetensors", + "model.layers.67.block_sparse_moe.experts.7.w2.weight": "model-00164-of-00193.safetensors", + "model.layers.67.block_sparse_moe.experts.7.w3.weight": "model-00164-of-00193.safetensors", + "model.layers.67.block_sparse_moe.gate.weight": "model-00162-of-00193.safetensors", + "model.layers.67.input_layernorm.weight": "model-00164-of-00193.safetensors", + "model.layers.67.post_attention_layernorm.weight": "model-00164-of-00193.safetensors", + "model.layers.67.self_attn.k_proj.weight": "model-00162-of-00193.safetensors", + "model.layers.67.self_attn.o_proj.weight": "model-00162-of-00193.safetensors", + "model.layers.67.self_attn.q_proj.weight": "model-00162-of-00193.safetensors", + "model.layers.67.self_attn.v_proj.weight": "model-00162-of-00193.safetensors", + "model.layers.68.block_sparse_moe.experts.0.w1.weight": "model-00164-of-00193.safetensors", + "model.layers.68.block_sparse_moe.experts.0.w2.weight": "model-00164-of-00193.safetensors", + "model.layers.68.block_sparse_moe.experts.0.w3.weight": "model-00164-of-00193.safetensors", + "model.layers.68.block_sparse_moe.experts.1.w1.weight": "model-00164-of-00193.safetensors", + "model.layers.68.block_sparse_moe.experts.1.w2.weight": "model-00165-of-00193.safetensors", + "model.layers.68.block_sparse_moe.experts.1.w3.weight": "model-00165-of-00193.safetensors", + "model.layers.68.block_sparse_moe.experts.2.w1.weight": "model-00165-of-00193.safetensors", + "model.layers.68.block_sparse_moe.experts.2.w2.weight": "model-00165-of-00193.safetensors", + "model.layers.68.block_sparse_moe.experts.2.w3.weight": "model-00165-of-00193.safetensors", + "model.layers.68.block_sparse_moe.experts.3.w1.weight": "model-00165-of-00193.safetensors", + "model.layers.68.block_sparse_moe.experts.3.w2.weight": "model-00165-of-00193.safetensors", + "model.layers.68.block_sparse_moe.experts.3.w3.weight": "model-00165-of-00193.safetensors", + "model.layers.68.block_sparse_moe.experts.4.w1.weight": "model-00165-of-00193.safetensors", + "model.layers.68.block_sparse_moe.experts.4.w2.weight": "model-00165-of-00193.safetensors", + "model.layers.68.block_sparse_moe.experts.4.w3.weight": "model-00166-of-00193.safetensors", + "model.layers.68.block_sparse_moe.experts.5.w1.weight": "model-00166-of-00193.safetensors", + "model.layers.68.block_sparse_moe.experts.5.w2.weight": "model-00166-of-00193.safetensors", + "model.layers.68.block_sparse_moe.experts.5.w3.weight": "model-00166-of-00193.safetensors", + "model.layers.68.block_sparse_moe.experts.6.w1.weight": "model-00166-of-00193.safetensors", + "model.layers.68.block_sparse_moe.experts.6.w2.weight": "model-00166-of-00193.safetensors", + "model.layers.68.block_sparse_moe.experts.6.w3.weight": "model-00166-of-00193.safetensors", + "model.layers.68.block_sparse_moe.experts.7.w1.weight": "model-00166-of-00193.safetensors", + "model.layers.68.block_sparse_moe.experts.7.w2.weight": "model-00166-of-00193.safetensors", + "model.layers.68.block_sparse_moe.experts.7.w3.weight": "model-00166-of-00193.safetensors", + "model.layers.68.block_sparse_moe.gate.weight": "model-00164-of-00193.safetensors", + "model.layers.68.input_layernorm.weight": "model-00166-of-00193.safetensors", + "model.layers.68.post_attention_layernorm.weight": "model-00166-of-00193.safetensors", + "model.layers.68.self_attn.k_proj.weight": "model-00164-of-00193.safetensors", + "model.layers.68.self_attn.o_proj.weight": "model-00164-of-00193.safetensors", + "model.layers.68.self_attn.q_proj.weight": "model-00164-of-00193.safetensors", + "model.layers.68.self_attn.v_proj.weight": "model-00164-of-00193.safetensors", + "model.layers.69.block_sparse_moe.experts.0.w1.weight": "model-00167-of-00193.safetensors", + "model.layers.69.block_sparse_moe.experts.0.w2.weight": "model-00167-of-00193.safetensors", + "model.layers.69.block_sparse_moe.experts.0.w3.weight": "model-00167-of-00193.safetensors", + "model.layers.69.block_sparse_moe.experts.1.w1.weight": "model-00167-of-00193.safetensors", + "model.layers.69.block_sparse_moe.experts.1.w2.weight": "model-00167-of-00193.safetensors", + "model.layers.69.block_sparse_moe.experts.1.w3.weight": "model-00167-of-00193.safetensors", + "model.layers.69.block_sparse_moe.experts.2.w1.weight": "model-00167-of-00193.safetensors", + "model.layers.69.block_sparse_moe.experts.2.w2.weight": "model-00167-of-00193.safetensors", + "model.layers.69.block_sparse_moe.experts.2.w3.weight": "model-00167-of-00193.safetensors", + "model.layers.69.block_sparse_moe.experts.3.w1.weight": "model-00167-of-00193.safetensors", + "model.layers.69.block_sparse_moe.experts.3.w2.weight": "model-00168-of-00193.safetensors", + "model.layers.69.block_sparse_moe.experts.3.w3.weight": "model-00168-of-00193.safetensors", + "model.layers.69.block_sparse_moe.experts.4.w1.weight": "model-00168-of-00193.safetensors", + "model.layers.69.block_sparse_moe.experts.4.w2.weight": "model-00168-of-00193.safetensors", + "model.layers.69.block_sparse_moe.experts.4.w3.weight": "model-00168-of-00193.safetensors", + "model.layers.69.block_sparse_moe.experts.5.w1.weight": "model-00168-of-00193.safetensors", + "model.layers.69.block_sparse_moe.experts.5.w2.weight": "model-00168-of-00193.safetensors", + "model.layers.69.block_sparse_moe.experts.5.w3.weight": "model-00168-of-00193.safetensors", + "model.layers.69.block_sparse_moe.experts.6.w1.weight": "model-00168-of-00193.safetensors", + "model.layers.69.block_sparse_moe.experts.6.w2.weight": "model-00168-of-00193.safetensors", + "model.layers.69.block_sparse_moe.experts.6.w3.weight": "model-00169-of-00193.safetensors", + "model.layers.69.block_sparse_moe.experts.7.w1.weight": "model-00169-of-00193.safetensors", + "model.layers.69.block_sparse_moe.experts.7.w2.weight": "model-00169-of-00193.safetensors", + "model.layers.69.block_sparse_moe.experts.7.w3.weight": "model-00169-of-00193.safetensors", + "model.layers.69.block_sparse_moe.gate.weight": "model-00166-of-00193.safetensors", + "model.layers.69.input_layernorm.weight": "model-00169-of-00193.safetensors", + "model.layers.69.post_attention_layernorm.weight": "model-00169-of-00193.safetensors", + "model.layers.69.self_attn.k_proj.weight": "model-00166-of-00193.safetensors", + "model.layers.69.self_attn.o_proj.weight": "model-00166-of-00193.safetensors", + "model.layers.69.self_attn.q_proj.weight": "model-00166-of-00193.safetensors", + "model.layers.69.self_attn.v_proj.weight": "model-00166-of-00193.safetensors", + "model.layers.7.block_sparse_moe.experts.0.w1.weight": "model-00018-of-00193.safetensors", + "model.layers.7.block_sparse_moe.experts.0.w2.weight": "model-00018-of-00193.safetensors", + "model.layers.7.block_sparse_moe.experts.0.w3.weight": "model-00018-of-00193.safetensors", + "model.layers.7.block_sparse_moe.experts.1.w1.weight": "model-00018-of-00193.safetensors", + "model.layers.7.block_sparse_moe.experts.1.w2.weight": "model-00018-of-00193.safetensors", + "model.layers.7.block_sparse_moe.experts.1.w3.weight": "model-00018-of-00193.safetensors", + "model.layers.7.block_sparse_moe.experts.2.w1.weight": "model-00018-of-00193.safetensors", + "model.layers.7.block_sparse_moe.experts.2.w2.weight": "model-00018-of-00193.safetensors", + "model.layers.7.block_sparse_moe.experts.2.w3.weight": "model-00019-of-00193.safetensors", + "model.layers.7.block_sparse_moe.experts.3.w1.weight": "model-00019-of-00193.safetensors", + "model.layers.7.block_sparse_moe.experts.3.w2.weight": "model-00019-of-00193.safetensors", + "model.layers.7.block_sparse_moe.experts.3.w3.weight": "model-00019-of-00193.safetensors", + "model.layers.7.block_sparse_moe.experts.4.w1.weight": "model-00019-of-00193.safetensors", + "model.layers.7.block_sparse_moe.experts.4.w2.weight": "model-00019-of-00193.safetensors", + "model.layers.7.block_sparse_moe.experts.4.w3.weight": "model-00019-of-00193.safetensors", + "model.layers.7.block_sparse_moe.experts.5.w1.weight": "model-00019-of-00193.safetensors", + "model.layers.7.block_sparse_moe.experts.5.w2.weight": "model-00019-of-00193.safetensors", + "model.layers.7.block_sparse_moe.experts.5.w3.weight": "model-00019-of-00193.safetensors", + "model.layers.7.block_sparse_moe.experts.6.w1.weight": "model-00020-of-00193.safetensors", + "model.layers.7.block_sparse_moe.experts.6.w2.weight": "model-00020-of-00193.safetensors", + "model.layers.7.block_sparse_moe.experts.6.w3.weight": "model-00020-of-00193.safetensors", + "model.layers.7.block_sparse_moe.experts.7.w1.weight": "model-00020-of-00193.safetensors", + "model.layers.7.block_sparse_moe.experts.7.w2.weight": "model-00020-of-00193.safetensors", + "model.layers.7.block_sparse_moe.experts.7.w3.weight": "model-00020-of-00193.safetensors", + "model.layers.7.block_sparse_moe.gate.weight": "model-00018-of-00193.safetensors", + "model.layers.7.input_layernorm.weight": "model-00020-of-00193.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00020-of-00193.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00018-of-00193.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00018-of-00193.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00018-of-00193.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00018-of-00193.safetensors", + "model.layers.70.block_sparse_moe.experts.0.w1.weight": "model-00169-of-00193.safetensors", + "model.layers.70.block_sparse_moe.experts.0.w2.weight": "model-00169-of-00193.safetensors", + "model.layers.70.block_sparse_moe.experts.0.w3.weight": "model-00169-of-00193.safetensors", + "model.layers.70.block_sparse_moe.experts.1.w1.weight": "model-00169-of-00193.safetensors", + "model.layers.70.block_sparse_moe.experts.1.w2.weight": "model-00169-of-00193.safetensors", + "model.layers.70.block_sparse_moe.experts.1.w3.weight": "model-00169-of-00193.safetensors", + "model.layers.70.block_sparse_moe.experts.2.w1.weight": "model-00170-of-00193.safetensors", + "model.layers.70.block_sparse_moe.experts.2.w2.weight": "model-00170-of-00193.safetensors", + "model.layers.70.block_sparse_moe.experts.2.w3.weight": "model-00170-of-00193.safetensors", + "model.layers.70.block_sparse_moe.experts.3.w1.weight": "model-00170-of-00193.safetensors", + "model.layers.70.block_sparse_moe.experts.3.w2.weight": "model-00170-of-00193.safetensors", + "model.layers.70.block_sparse_moe.experts.3.w3.weight": "model-00170-of-00193.safetensors", + "model.layers.70.block_sparse_moe.experts.4.w1.weight": "model-00170-of-00193.safetensors", + "model.layers.70.block_sparse_moe.experts.4.w2.weight": "model-00170-of-00193.safetensors", + "model.layers.70.block_sparse_moe.experts.4.w3.weight": "model-00170-of-00193.safetensors", + "model.layers.70.block_sparse_moe.experts.5.w1.weight": "model-00170-of-00193.safetensors", + "model.layers.70.block_sparse_moe.experts.5.w2.weight": "model-00171-of-00193.safetensors", + "model.layers.70.block_sparse_moe.experts.5.w3.weight": "model-00171-of-00193.safetensors", + "model.layers.70.block_sparse_moe.experts.6.w1.weight": "model-00171-of-00193.safetensors", + "model.layers.70.block_sparse_moe.experts.6.w2.weight": "model-00171-of-00193.safetensors", + "model.layers.70.block_sparse_moe.experts.6.w3.weight": "model-00171-of-00193.safetensors", + "model.layers.70.block_sparse_moe.experts.7.w1.weight": "model-00171-of-00193.safetensors", + "model.layers.70.block_sparse_moe.experts.7.w2.weight": "model-00171-of-00193.safetensors", + "model.layers.70.block_sparse_moe.experts.7.w3.weight": "model-00171-of-00193.safetensors", + "model.layers.70.block_sparse_moe.gate.weight": "model-00169-of-00193.safetensors", + "model.layers.70.input_layernorm.weight": "model-00171-of-00193.safetensors", + "model.layers.70.post_attention_layernorm.weight": "model-00171-of-00193.safetensors", + "model.layers.70.self_attn.k_proj.weight": "model-00169-of-00193.safetensors", + "model.layers.70.self_attn.o_proj.weight": "model-00169-of-00193.safetensors", + "model.layers.70.self_attn.q_proj.weight": "model-00169-of-00193.safetensors", + "model.layers.70.self_attn.v_proj.weight": "model-00169-of-00193.safetensors", + "model.layers.71.block_sparse_moe.experts.0.w1.weight": "model-00171-of-00193.safetensors", + "model.layers.71.block_sparse_moe.experts.0.w2.weight": "model-00171-of-00193.safetensors", + "model.layers.71.block_sparse_moe.experts.0.w3.weight": "model-00172-of-00193.safetensors", + "model.layers.71.block_sparse_moe.experts.1.w1.weight": "model-00172-of-00193.safetensors", + "model.layers.71.block_sparse_moe.experts.1.w2.weight": "model-00172-of-00193.safetensors", + "model.layers.71.block_sparse_moe.experts.1.w3.weight": "model-00172-of-00193.safetensors", + "model.layers.71.block_sparse_moe.experts.2.w1.weight": "model-00172-of-00193.safetensors", + "model.layers.71.block_sparse_moe.experts.2.w2.weight": "model-00172-of-00193.safetensors", + "model.layers.71.block_sparse_moe.experts.2.w3.weight": "model-00172-of-00193.safetensors", + "model.layers.71.block_sparse_moe.experts.3.w1.weight": "model-00172-of-00193.safetensors", + "model.layers.71.block_sparse_moe.experts.3.w2.weight": "model-00172-of-00193.safetensors", + "model.layers.71.block_sparse_moe.experts.3.w3.weight": "model-00172-of-00193.safetensors", + "model.layers.71.block_sparse_moe.experts.4.w1.weight": "model-00173-of-00193.safetensors", + "model.layers.71.block_sparse_moe.experts.4.w2.weight": "model-00173-of-00193.safetensors", + "model.layers.71.block_sparse_moe.experts.4.w3.weight": "model-00173-of-00193.safetensors", + "model.layers.71.block_sparse_moe.experts.5.w1.weight": "model-00173-of-00193.safetensors", + "model.layers.71.block_sparse_moe.experts.5.w2.weight": "model-00173-of-00193.safetensors", + "model.layers.71.block_sparse_moe.experts.5.w3.weight": "model-00173-of-00193.safetensors", + "model.layers.71.block_sparse_moe.experts.6.w1.weight": "model-00173-of-00193.safetensors", + "model.layers.71.block_sparse_moe.experts.6.w2.weight": "model-00173-of-00193.safetensors", + "model.layers.71.block_sparse_moe.experts.6.w3.weight": "model-00173-of-00193.safetensors", + "model.layers.71.block_sparse_moe.experts.7.w1.weight": "model-00173-of-00193.safetensors", + "model.layers.71.block_sparse_moe.experts.7.w2.weight": "model-00174-of-00193.safetensors", + "model.layers.71.block_sparse_moe.experts.7.w3.weight": "model-00174-of-00193.safetensors", + "model.layers.71.block_sparse_moe.gate.weight": "model-00171-of-00193.safetensors", + "model.layers.71.input_layernorm.weight": "model-00174-of-00193.safetensors", + "model.layers.71.post_attention_layernorm.weight": "model-00174-of-00193.safetensors", + "model.layers.71.self_attn.k_proj.weight": "model-00171-of-00193.safetensors", + "model.layers.71.self_attn.o_proj.weight": "model-00171-of-00193.safetensors", + "model.layers.71.self_attn.q_proj.weight": "model-00171-of-00193.safetensors", + "model.layers.71.self_attn.v_proj.weight": "model-00171-of-00193.safetensors", + "model.layers.72.block_sparse_moe.experts.0.w1.weight": "model-00174-of-00193.safetensors", + "model.layers.72.block_sparse_moe.experts.0.w2.weight": "model-00174-of-00193.safetensors", + "model.layers.72.block_sparse_moe.experts.0.w3.weight": "model-00174-of-00193.safetensors", + "model.layers.72.block_sparse_moe.experts.1.w1.weight": "model-00174-of-00193.safetensors", + "model.layers.72.block_sparse_moe.experts.1.w2.weight": "model-00174-of-00193.safetensors", + "model.layers.72.block_sparse_moe.experts.1.w3.weight": "model-00174-of-00193.safetensors", + "model.layers.72.block_sparse_moe.experts.2.w1.weight": "model-00174-of-00193.safetensors", + "model.layers.72.block_sparse_moe.experts.2.w2.weight": "model-00174-of-00193.safetensors", + "model.layers.72.block_sparse_moe.experts.2.w3.weight": "model-00175-of-00193.safetensors", + "model.layers.72.block_sparse_moe.experts.3.w1.weight": "model-00175-of-00193.safetensors", + "model.layers.72.block_sparse_moe.experts.3.w2.weight": "model-00175-of-00193.safetensors", + "model.layers.72.block_sparse_moe.experts.3.w3.weight": "model-00175-of-00193.safetensors", + "model.layers.72.block_sparse_moe.experts.4.w1.weight": "model-00175-of-00193.safetensors", + "model.layers.72.block_sparse_moe.experts.4.w2.weight": "model-00175-of-00193.safetensors", + "model.layers.72.block_sparse_moe.experts.4.w3.weight": "model-00175-of-00193.safetensors", + "model.layers.72.block_sparse_moe.experts.5.w1.weight": "model-00175-of-00193.safetensors", + "model.layers.72.block_sparse_moe.experts.5.w2.weight": "model-00175-of-00193.safetensors", + "model.layers.72.block_sparse_moe.experts.5.w3.weight": "model-00175-of-00193.safetensors", + "model.layers.72.block_sparse_moe.experts.6.w1.weight": "model-00176-of-00193.safetensors", + "model.layers.72.block_sparse_moe.experts.6.w2.weight": "model-00176-of-00193.safetensors", + "model.layers.72.block_sparse_moe.experts.6.w3.weight": "model-00176-of-00193.safetensors", + "model.layers.72.block_sparse_moe.experts.7.w1.weight": "model-00176-of-00193.safetensors", + "model.layers.72.block_sparse_moe.experts.7.w2.weight": "model-00176-of-00193.safetensors", + "model.layers.72.block_sparse_moe.experts.7.w3.weight": "model-00176-of-00193.safetensors", + "model.layers.72.block_sparse_moe.gate.weight": "model-00174-of-00193.safetensors", + "model.layers.72.input_layernorm.weight": "model-00176-of-00193.safetensors", + "model.layers.72.post_attention_layernorm.weight": "model-00176-of-00193.safetensors", + "model.layers.72.self_attn.k_proj.weight": "model-00174-of-00193.safetensors", + "model.layers.72.self_attn.o_proj.weight": "model-00174-of-00193.safetensors", + "model.layers.72.self_attn.q_proj.weight": "model-00174-of-00193.safetensors", + "model.layers.72.self_attn.v_proj.weight": "model-00174-of-00193.safetensors", + "model.layers.73.block_sparse_moe.experts.0.w1.weight": "model-00176-of-00193.safetensors", + "model.layers.73.block_sparse_moe.experts.0.w2.weight": "model-00176-of-00193.safetensors", + "model.layers.73.block_sparse_moe.experts.0.w3.weight": "model-00176-of-00193.safetensors", + "model.layers.73.block_sparse_moe.experts.1.w1.weight": "model-00176-of-00193.safetensors", + "model.layers.73.block_sparse_moe.experts.1.w2.weight": "model-00177-of-00193.safetensors", + "model.layers.73.block_sparse_moe.experts.1.w3.weight": "model-00177-of-00193.safetensors", + "model.layers.73.block_sparse_moe.experts.2.w1.weight": "model-00177-of-00193.safetensors", + "model.layers.73.block_sparse_moe.experts.2.w2.weight": "model-00177-of-00193.safetensors", + "model.layers.73.block_sparse_moe.experts.2.w3.weight": "model-00177-of-00193.safetensors", + "model.layers.73.block_sparse_moe.experts.3.w1.weight": "model-00177-of-00193.safetensors", + "model.layers.73.block_sparse_moe.experts.3.w2.weight": "model-00177-of-00193.safetensors", + "model.layers.73.block_sparse_moe.experts.3.w3.weight": "model-00177-of-00193.safetensors", + "model.layers.73.block_sparse_moe.experts.4.w1.weight": "model-00177-of-00193.safetensors", + "model.layers.73.block_sparse_moe.experts.4.w2.weight": "model-00177-of-00193.safetensors", + "model.layers.73.block_sparse_moe.experts.4.w3.weight": "model-00178-of-00193.safetensors", + "model.layers.73.block_sparse_moe.experts.5.w1.weight": "model-00178-of-00193.safetensors", + "model.layers.73.block_sparse_moe.experts.5.w2.weight": "model-00178-of-00193.safetensors", + "model.layers.73.block_sparse_moe.experts.5.w3.weight": "model-00178-of-00193.safetensors", + "model.layers.73.block_sparse_moe.experts.6.w1.weight": "model-00178-of-00193.safetensors", + "model.layers.73.block_sparse_moe.experts.6.w2.weight": "model-00178-of-00193.safetensors", + "model.layers.73.block_sparse_moe.experts.6.w3.weight": "model-00178-of-00193.safetensors", + "model.layers.73.block_sparse_moe.experts.7.w1.weight": "model-00178-of-00193.safetensors", + "model.layers.73.block_sparse_moe.experts.7.w2.weight": "model-00178-of-00193.safetensors", + "model.layers.73.block_sparse_moe.experts.7.w3.weight": "model-00178-of-00193.safetensors", + "model.layers.73.block_sparse_moe.gate.weight": "model-00176-of-00193.safetensors", + "model.layers.73.input_layernorm.weight": "model-00178-of-00193.safetensors", + "model.layers.73.post_attention_layernorm.weight": "model-00178-of-00193.safetensors", + "model.layers.73.self_attn.k_proj.weight": "model-00176-of-00193.safetensors", + "model.layers.73.self_attn.o_proj.weight": "model-00176-of-00193.safetensors", + "model.layers.73.self_attn.q_proj.weight": "model-00176-of-00193.safetensors", + "model.layers.73.self_attn.v_proj.weight": "model-00176-of-00193.safetensors", + "model.layers.74.block_sparse_moe.experts.0.w1.weight": "model-00179-of-00193.safetensors", + "model.layers.74.block_sparse_moe.experts.0.w2.weight": "model-00179-of-00193.safetensors", + "model.layers.74.block_sparse_moe.experts.0.w3.weight": "model-00179-of-00193.safetensors", + "model.layers.74.block_sparse_moe.experts.1.w1.weight": "model-00179-of-00193.safetensors", + "model.layers.74.block_sparse_moe.experts.1.w2.weight": "model-00179-of-00193.safetensors", + "model.layers.74.block_sparse_moe.experts.1.w3.weight": "model-00179-of-00193.safetensors", + "model.layers.74.block_sparse_moe.experts.2.w1.weight": "model-00179-of-00193.safetensors", + "model.layers.74.block_sparse_moe.experts.2.w2.weight": "model-00179-of-00193.safetensors", + "model.layers.74.block_sparse_moe.experts.2.w3.weight": "model-00179-of-00193.safetensors", + "model.layers.74.block_sparse_moe.experts.3.w1.weight": "model-00179-of-00193.safetensors", + "model.layers.74.block_sparse_moe.experts.3.w2.weight": "model-00180-of-00193.safetensors", + "model.layers.74.block_sparse_moe.experts.3.w3.weight": "model-00180-of-00193.safetensors", + "model.layers.74.block_sparse_moe.experts.4.w1.weight": "model-00180-of-00193.safetensors", + "model.layers.74.block_sparse_moe.experts.4.w2.weight": "model-00180-of-00193.safetensors", + "model.layers.74.block_sparse_moe.experts.4.w3.weight": "model-00180-of-00193.safetensors", + "model.layers.74.block_sparse_moe.experts.5.w1.weight": "model-00180-of-00193.safetensors", + "model.layers.74.block_sparse_moe.experts.5.w2.weight": "model-00180-of-00193.safetensors", + "model.layers.74.block_sparse_moe.experts.5.w3.weight": "model-00180-of-00193.safetensors", + "model.layers.74.block_sparse_moe.experts.6.w1.weight": "model-00180-of-00193.safetensors", + "model.layers.74.block_sparse_moe.experts.6.w2.weight": "model-00180-of-00193.safetensors", + "model.layers.74.block_sparse_moe.experts.6.w3.weight": "model-00181-of-00193.safetensors", + "model.layers.74.block_sparse_moe.experts.7.w1.weight": "model-00181-of-00193.safetensors", + "model.layers.74.block_sparse_moe.experts.7.w2.weight": "model-00181-of-00193.safetensors", + "model.layers.74.block_sparse_moe.experts.7.w3.weight": "model-00181-of-00193.safetensors", + "model.layers.74.block_sparse_moe.gate.weight": "model-00178-of-00193.safetensors", + "model.layers.74.input_layernorm.weight": "model-00181-of-00193.safetensors", + "model.layers.74.post_attention_layernorm.weight": "model-00181-of-00193.safetensors", + "model.layers.74.self_attn.k_proj.weight": "model-00178-of-00193.safetensors", + "model.layers.74.self_attn.o_proj.weight": "model-00178-of-00193.safetensors", + "model.layers.74.self_attn.q_proj.weight": "model-00178-of-00193.safetensors", + "model.layers.74.self_attn.v_proj.weight": "model-00178-of-00193.safetensors", + "model.layers.75.block_sparse_moe.experts.0.w1.weight": "model-00181-of-00193.safetensors", + "model.layers.75.block_sparse_moe.experts.0.w2.weight": "model-00181-of-00193.safetensors", + "model.layers.75.block_sparse_moe.experts.0.w3.weight": "model-00181-of-00193.safetensors", + "model.layers.75.block_sparse_moe.experts.1.w1.weight": "model-00181-of-00193.safetensors", + "model.layers.75.block_sparse_moe.experts.1.w2.weight": "model-00181-of-00193.safetensors", + "model.layers.75.block_sparse_moe.experts.1.w3.weight": "model-00181-of-00193.safetensors", + "model.layers.75.block_sparse_moe.experts.2.w1.weight": "model-00182-of-00193.safetensors", + "model.layers.75.block_sparse_moe.experts.2.w2.weight": "model-00182-of-00193.safetensors", + "model.layers.75.block_sparse_moe.experts.2.w3.weight": "model-00182-of-00193.safetensors", + "model.layers.75.block_sparse_moe.experts.3.w1.weight": "model-00182-of-00193.safetensors", + "model.layers.75.block_sparse_moe.experts.3.w2.weight": "model-00182-of-00193.safetensors", + "model.layers.75.block_sparse_moe.experts.3.w3.weight": "model-00182-of-00193.safetensors", + "model.layers.75.block_sparse_moe.experts.4.w1.weight": "model-00182-of-00193.safetensors", + "model.layers.75.block_sparse_moe.experts.4.w2.weight": "model-00182-of-00193.safetensors", + "model.layers.75.block_sparse_moe.experts.4.w3.weight": "model-00182-of-00193.safetensors", + "model.layers.75.block_sparse_moe.experts.5.w1.weight": "model-00182-of-00193.safetensors", + "model.layers.75.block_sparse_moe.experts.5.w2.weight": "model-00183-of-00193.safetensors", + "model.layers.75.block_sparse_moe.experts.5.w3.weight": "model-00183-of-00193.safetensors", + "model.layers.75.block_sparse_moe.experts.6.w1.weight": "model-00183-of-00193.safetensors", + "model.layers.75.block_sparse_moe.experts.6.w2.weight": "model-00183-of-00193.safetensors", + "model.layers.75.block_sparse_moe.experts.6.w3.weight": "model-00183-of-00193.safetensors", + "model.layers.75.block_sparse_moe.experts.7.w1.weight": "model-00183-of-00193.safetensors", + "model.layers.75.block_sparse_moe.experts.7.w2.weight": "model-00183-of-00193.safetensors", + "model.layers.75.block_sparse_moe.experts.7.w3.weight": "model-00183-of-00193.safetensors", + "model.layers.75.block_sparse_moe.gate.weight": "model-00181-of-00193.safetensors", + "model.layers.75.input_layernorm.weight": "model-00183-of-00193.safetensors", + "model.layers.75.post_attention_layernorm.weight": "model-00183-of-00193.safetensors", + "model.layers.75.self_attn.k_proj.weight": "model-00181-of-00193.safetensors", + "model.layers.75.self_attn.o_proj.weight": "model-00181-of-00193.safetensors", + "model.layers.75.self_attn.q_proj.weight": "model-00181-of-00193.safetensors", + "model.layers.75.self_attn.v_proj.weight": "model-00181-of-00193.safetensors", + "model.layers.76.block_sparse_moe.experts.0.w1.weight": "model-00183-of-00193.safetensors", + "model.layers.76.block_sparse_moe.experts.0.w2.weight": "model-00183-of-00193.safetensors", + "model.layers.76.block_sparse_moe.experts.0.w3.weight": "model-00184-of-00193.safetensors", + "model.layers.76.block_sparse_moe.experts.1.w1.weight": "model-00184-of-00193.safetensors", + "model.layers.76.block_sparse_moe.experts.1.w2.weight": "model-00184-of-00193.safetensors", + "model.layers.76.block_sparse_moe.experts.1.w3.weight": "model-00184-of-00193.safetensors", + "model.layers.76.block_sparse_moe.experts.2.w1.weight": "model-00184-of-00193.safetensors", + "model.layers.76.block_sparse_moe.experts.2.w2.weight": "model-00184-of-00193.safetensors", + "model.layers.76.block_sparse_moe.experts.2.w3.weight": "model-00184-of-00193.safetensors", + "model.layers.76.block_sparse_moe.experts.3.w1.weight": "model-00184-of-00193.safetensors", + "model.layers.76.block_sparse_moe.experts.3.w2.weight": "model-00184-of-00193.safetensors", + "model.layers.76.block_sparse_moe.experts.3.w3.weight": "model-00184-of-00193.safetensors", + "model.layers.76.block_sparse_moe.experts.4.w1.weight": "model-00185-of-00193.safetensors", + "model.layers.76.block_sparse_moe.experts.4.w2.weight": "model-00185-of-00193.safetensors", + "model.layers.76.block_sparse_moe.experts.4.w3.weight": "model-00185-of-00193.safetensors", + "model.layers.76.block_sparse_moe.experts.5.w1.weight": "model-00185-of-00193.safetensors", + "model.layers.76.block_sparse_moe.experts.5.w2.weight": "model-00185-of-00193.safetensors", + "model.layers.76.block_sparse_moe.experts.5.w3.weight": "model-00185-of-00193.safetensors", + "model.layers.76.block_sparse_moe.experts.6.w1.weight": "model-00185-of-00193.safetensors", + "model.layers.76.block_sparse_moe.experts.6.w2.weight": "model-00185-of-00193.safetensors", + "model.layers.76.block_sparse_moe.experts.6.w3.weight": "model-00185-of-00193.safetensors", + "model.layers.76.block_sparse_moe.experts.7.w1.weight": "model-00185-of-00193.safetensors", + "model.layers.76.block_sparse_moe.experts.7.w2.weight": "model-00186-of-00193.safetensors", + "model.layers.76.block_sparse_moe.experts.7.w3.weight": "model-00186-of-00193.safetensors", + "model.layers.76.block_sparse_moe.gate.weight": "model-00183-of-00193.safetensors", + "model.layers.76.input_layernorm.weight": "model-00186-of-00193.safetensors", + "model.layers.76.post_attention_layernorm.weight": "model-00186-of-00193.safetensors", + "model.layers.76.self_attn.k_proj.weight": "model-00183-of-00193.safetensors", + "model.layers.76.self_attn.o_proj.weight": "model-00183-of-00193.safetensors", + "model.layers.76.self_attn.q_proj.weight": "model-00183-of-00193.safetensors", + "model.layers.76.self_attn.v_proj.weight": "model-00183-of-00193.safetensors", + "model.layers.77.block_sparse_moe.experts.0.w1.weight": "model-00186-of-00193.safetensors", + "model.layers.77.block_sparse_moe.experts.0.w2.weight": "model-00186-of-00193.safetensors", + "model.layers.77.block_sparse_moe.experts.0.w3.weight": "model-00186-of-00193.safetensors", + "model.layers.77.block_sparse_moe.experts.1.w1.weight": "model-00186-of-00193.safetensors", + "model.layers.77.block_sparse_moe.experts.1.w2.weight": "model-00186-of-00193.safetensors", + "model.layers.77.block_sparse_moe.experts.1.w3.weight": "model-00186-of-00193.safetensors", + "model.layers.77.block_sparse_moe.experts.2.w1.weight": "model-00186-of-00193.safetensors", + "model.layers.77.block_sparse_moe.experts.2.w2.weight": "model-00186-of-00193.safetensors", + "model.layers.77.block_sparse_moe.experts.2.w3.weight": "model-00187-of-00193.safetensors", + "model.layers.77.block_sparse_moe.experts.3.w1.weight": "model-00187-of-00193.safetensors", + "model.layers.77.block_sparse_moe.experts.3.w2.weight": "model-00187-of-00193.safetensors", + "model.layers.77.block_sparse_moe.experts.3.w3.weight": "model-00187-of-00193.safetensors", + "model.layers.77.block_sparse_moe.experts.4.w1.weight": "model-00187-of-00193.safetensors", + "model.layers.77.block_sparse_moe.experts.4.w2.weight": "model-00187-of-00193.safetensors", + "model.layers.77.block_sparse_moe.experts.4.w3.weight": "model-00187-of-00193.safetensors", + "model.layers.77.block_sparse_moe.experts.5.w1.weight": "model-00187-of-00193.safetensors", + "model.layers.77.block_sparse_moe.experts.5.w2.weight": "model-00187-of-00193.safetensors", + "model.layers.77.block_sparse_moe.experts.5.w3.weight": "model-00187-of-00193.safetensors", + "model.layers.77.block_sparse_moe.experts.6.w1.weight": "model-00188-of-00193.safetensors", + "model.layers.77.block_sparse_moe.experts.6.w2.weight": "model-00188-of-00193.safetensors", + "model.layers.77.block_sparse_moe.experts.6.w3.weight": "model-00188-of-00193.safetensors", + "model.layers.77.block_sparse_moe.experts.7.w1.weight": "model-00188-of-00193.safetensors", + "model.layers.77.block_sparse_moe.experts.7.w2.weight": "model-00188-of-00193.safetensors", + "model.layers.77.block_sparse_moe.experts.7.w3.weight": "model-00188-of-00193.safetensors", + "model.layers.77.block_sparse_moe.gate.weight": "model-00186-of-00193.safetensors", + "model.layers.77.input_layernorm.weight": "model-00188-of-00193.safetensors", + "model.layers.77.post_attention_layernorm.weight": "model-00188-of-00193.safetensors", + "model.layers.77.self_attn.k_proj.weight": "model-00186-of-00193.safetensors", + "model.layers.77.self_attn.o_proj.weight": "model-00186-of-00193.safetensors", + "model.layers.77.self_attn.q_proj.weight": "model-00186-of-00193.safetensors", + "model.layers.77.self_attn.v_proj.weight": "model-00186-of-00193.safetensors", + "model.layers.78.block_sparse_moe.experts.0.w1.weight": "model-00188-of-00193.safetensors", + "model.layers.78.block_sparse_moe.experts.0.w2.weight": "model-00188-of-00193.safetensors", + "model.layers.78.block_sparse_moe.experts.0.w3.weight": "model-00188-of-00193.safetensors", + "model.layers.78.block_sparse_moe.experts.1.w1.weight": "model-00188-of-00193.safetensors", + "model.layers.78.block_sparse_moe.experts.1.w2.weight": "model-00189-of-00193.safetensors", + "model.layers.78.block_sparse_moe.experts.1.w3.weight": "model-00189-of-00193.safetensors", + "model.layers.78.block_sparse_moe.experts.2.w1.weight": "model-00189-of-00193.safetensors", + "model.layers.78.block_sparse_moe.experts.2.w2.weight": "model-00189-of-00193.safetensors", + "model.layers.78.block_sparse_moe.experts.2.w3.weight": "model-00189-of-00193.safetensors", + "model.layers.78.block_sparse_moe.experts.3.w1.weight": "model-00189-of-00193.safetensors", + "model.layers.78.block_sparse_moe.experts.3.w2.weight": "model-00189-of-00193.safetensors", + "model.layers.78.block_sparse_moe.experts.3.w3.weight": "model-00189-of-00193.safetensors", + "model.layers.78.block_sparse_moe.experts.4.w1.weight": "model-00189-of-00193.safetensors", + "model.layers.78.block_sparse_moe.experts.4.w2.weight": "model-00189-of-00193.safetensors", + "model.layers.78.block_sparse_moe.experts.4.w3.weight": "model-00190-of-00193.safetensors", + "model.layers.78.block_sparse_moe.experts.5.w1.weight": "model-00190-of-00193.safetensors", + "model.layers.78.block_sparse_moe.experts.5.w2.weight": "model-00190-of-00193.safetensors", + "model.layers.78.block_sparse_moe.experts.5.w3.weight": "model-00190-of-00193.safetensors", + "model.layers.78.block_sparse_moe.experts.6.w1.weight": "model-00190-of-00193.safetensors", + "model.layers.78.block_sparse_moe.experts.6.w2.weight": "model-00190-of-00193.safetensors", + "model.layers.78.block_sparse_moe.experts.6.w3.weight": "model-00190-of-00193.safetensors", + "model.layers.78.block_sparse_moe.experts.7.w1.weight": "model-00190-of-00193.safetensors", + "model.layers.78.block_sparse_moe.experts.7.w2.weight": "model-00190-of-00193.safetensors", + "model.layers.78.block_sparse_moe.experts.7.w3.weight": "model-00190-of-00193.safetensors", + "model.layers.78.block_sparse_moe.gate.weight": "model-00188-of-00193.safetensors", + "model.layers.78.input_layernorm.weight": "model-00190-of-00193.safetensors", + "model.layers.78.post_attention_layernorm.weight": "model-00190-of-00193.safetensors", + "model.layers.78.self_attn.k_proj.weight": "model-00188-of-00193.safetensors", + "model.layers.78.self_attn.o_proj.weight": "model-00188-of-00193.safetensors", + "model.layers.78.self_attn.q_proj.weight": "model-00188-of-00193.safetensors", + "model.layers.78.self_attn.v_proj.weight": "model-00188-of-00193.safetensors", + "model.layers.79.block_sparse_moe.experts.0.w1.weight": "model-00191-of-00193.safetensors", + "model.layers.79.block_sparse_moe.experts.0.w2.weight": "model-00191-of-00193.safetensors", + "model.layers.79.block_sparse_moe.experts.0.w3.weight": "model-00191-of-00193.safetensors", + "model.layers.79.block_sparse_moe.experts.1.w1.weight": "model-00191-of-00193.safetensors", + "model.layers.79.block_sparse_moe.experts.1.w2.weight": "model-00191-of-00193.safetensors", + "model.layers.79.block_sparse_moe.experts.1.w3.weight": "model-00191-of-00193.safetensors", + "model.layers.79.block_sparse_moe.experts.2.w1.weight": "model-00191-of-00193.safetensors", + "model.layers.79.block_sparse_moe.experts.2.w2.weight": "model-00191-of-00193.safetensors", + "model.layers.79.block_sparse_moe.experts.2.w3.weight": "model-00191-of-00193.safetensors", + "model.layers.79.block_sparse_moe.experts.3.w1.weight": "model-00191-of-00193.safetensors", + "model.layers.79.block_sparse_moe.experts.3.w2.weight": "model-00192-of-00193.safetensors", + "model.layers.79.block_sparse_moe.experts.3.w3.weight": "model-00192-of-00193.safetensors", + "model.layers.79.block_sparse_moe.experts.4.w1.weight": "model-00192-of-00193.safetensors", + "model.layers.79.block_sparse_moe.experts.4.w2.weight": "model-00192-of-00193.safetensors", + "model.layers.79.block_sparse_moe.experts.4.w3.weight": "model-00192-of-00193.safetensors", + "model.layers.79.block_sparse_moe.experts.5.w1.weight": "model-00192-of-00193.safetensors", + "model.layers.79.block_sparse_moe.experts.5.w2.weight": "model-00192-of-00193.safetensors", + "model.layers.79.block_sparse_moe.experts.5.w3.weight": "model-00192-of-00193.safetensors", + "model.layers.79.block_sparse_moe.experts.6.w1.weight": "model-00192-of-00193.safetensors", + "model.layers.79.block_sparse_moe.experts.6.w2.weight": "model-00192-of-00193.safetensors", + "model.layers.79.block_sparse_moe.experts.6.w3.weight": "model-00193-of-00193.safetensors", + "model.layers.79.block_sparse_moe.experts.7.w1.weight": "model-00193-of-00193.safetensors", + "model.layers.79.block_sparse_moe.experts.7.w2.weight": "model-00193-of-00193.safetensors", + "model.layers.79.block_sparse_moe.experts.7.w3.weight": "model-00193-of-00193.safetensors", + "model.layers.79.block_sparse_moe.gate.weight": "model-00190-of-00193.safetensors", + "model.layers.79.input_layernorm.weight": "model-00193-of-00193.safetensors", + "model.layers.79.post_attention_layernorm.weight": "model-00193-of-00193.safetensors", + "model.layers.79.self_attn.k_proj.weight": "model-00190-of-00193.safetensors", + "model.layers.79.self_attn.o_proj.weight": "model-00190-of-00193.safetensors", + "model.layers.79.self_attn.q_proj.weight": "model-00190-of-00193.safetensors", + "model.layers.79.self_attn.v_proj.weight": "model-00190-of-00193.safetensors", + "model.layers.8.block_sparse_moe.experts.0.w1.weight": "model-00020-of-00193.safetensors", + "model.layers.8.block_sparse_moe.experts.0.w2.weight": "model-00020-of-00193.safetensors", + "model.layers.8.block_sparse_moe.experts.0.w3.weight": "model-00020-of-00193.safetensors", + "model.layers.8.block_sparse_moe.experts.1.w1.weight": "model-00020-of-00193.safetensors", + "model.layers.8.block_sparse_moe.experts.1.w2.weight": "model-00021-of-00193.safetensors", + "model.layers.8.block_sparse_moe.experts.1.w3.weight": "model-00021-of-00193.safetensors", + "model.layers.8.block_sparse_moe.experts.2.w1.weight": "model-00021-of-00193.safetensors", + "model.layers.8.block_sparse_moe.experts.2.w2.weight": "model-00021-of-00193.safetensors", + "model.layers.8.block_sparse_moe.experts.2.w3.weight": "model-00021-of-00193.safetensors", + "model.layers.8.block_sparse_moe.experts.3.w1.weight": "model-00021-of-00193.safetensors", + "model.layers.8.block_sparse_moe.experts.3.w2.weight": "model-00021-of-00193.safetensors", + "model.layers.8.block_sparse_moe.experts.3.w3.weight": "model-00021-of-00193.safetensors", + "model.layers.8.block_sparse_moe.experts.4.w1.weight": "model-00021-of-00193.safetensors", + "model.layers.8.block_sparse_moe.experts.4.w2.weight": "model-00021-of-00193.safetensors", + "model.layers.8.block_sparse_moe.experts.4.w3.weight": "model-00022-of-00193.safetensors", + "model.layers.8.block_sparse_moe.experts.5.w1.weight": "model-00022-of-00193.safetensors", + "model.layers.8.block_sparse_moe.experts.5.w2.weight": "model-00022-of-00193.safetensors", + "model.layers.8.block_sparse_moe.experts.5.w3.weight": "model-00022-of-00193.safetensors", + "model.layers.8.block_sparse_moe.experts.6.w1.weight": "model-00022-of-00193.safetensors", + "model.layers.8.block_sparse_moe.experts.6.w2.weight": "model-00022-of-00193.safetensors", + "model.layers.8.block_sparse_moe.experts.6.w3.weight": "model-00022-of-00193.safetensors", + "model.layers.8.block_sparse_moe.experts.7.w1.weight": "model-00022-of-00193.safetensors", + "model.layers.8.block_sparse_moe.experts.7.w2.weight": "model-00022-of-00193.safetensors", + "model.layers.8.block_sparse_moe.experts.7.w3.weight": "model-00022-of-00193.safetensors", + "model.layers.8.block_sparse_moe.gate.weight": "model-00020-of-00193.safetensors", + "model.layers.8.input_layernorm.weight": "model-00022-of-00193.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00022-of-00193.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00020-of-00193.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00020-of-00193.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00020-of-00193.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00020-of-00193.safetensors", + "model.layers.9.block_sparse_moe.experts.0.w1.weight": "model-00023-of-00193.safetensors", + "model.layers.9.block_sparse_moe.experts.0.w2.weight": "model-00023-of-00193.safetensors", + "model.layers.9.block_sparse_moe.experts.0.w3.weight": "model-00023-of-00193.safetensors", + "model.layers.9.block_sparse_moe.experts.1.w1.weight": "model-00023-of-00193.safetensors", + "model.layers.9.block_sparse_moe.experts.1.w2.weight": "model-00023-of-00193.safetensors", + "model.layers.9.block_sparse_moe.experts.1.w3.weight": "model-00023-of-00193.safetensors", + "model.layers.9.block_sparse_moe.experts.2.w1.weight": "model-00023-of-00193.safetensors", + "model.layers.9.block_sparse_moe.experts.2.w2.weight": "model-00023-of-00193.safetensors", + "model.layers.9.block_sparse_moe.experts.2.w3.weight": "model-00023-of-00193.safetensors", + "model.layers.9.block_sparse_moe.experts.3.w1.weight": "model-00023-of-00193.safetensors", + "model.layers.9.block_sparse_moe.experts.3.w2.weight": "model-00024-of-00193.safetensors", + "model.layers.9.block_sparse_moe.experts.3.w3.weight": "model-00024-of-00193.safetensors", + "model.layers.9.block_sparse_moe.experts.4.w1.weight": "model-00024-of-00193.safetensors", + "model.layers.9.block_sparse_moe.experts.4.w2.weight": "model-00024-of-00193.safetensors", + "model.layers.9.block_sparse_moe.experts.4.w3.weight": "model-00024-of-00193.safetensors", + "model.layers.9.block_sparse_moe.experts.5.w1.weight": "model-00024-of-00193.safetensors", + "model.layers.9.block_sparse_moe.experts.5.w2.weight": "model-00024-of-00193.safetensors", + "model.layers.9.block_sparse_moe.experts.5.w3.weight": "model-00024-of-00193.safetensors", + "model.layers.9.block_sparse_moe.experts.6.w1.weight": "model-00024-of-00193.safetensors", + "model.layers.9.block_sparse_moe.experts.6.w2.weight": "model-00024-of-00193.safetensors", + "model.layers.9.block_sparse_moe.experts.6.w3.weight": "model-00025-of-00193.safetensors", + "model.layers.9.block_sparse_moe.experts.7.w1.weight": "model-00025-of-00193.safetensors", + "model.layers.9.block_sparse_moe.experts.7.w2.weight": "model-00025-of-00193.safetensors", + "model.layers.9.block_sparse_moe.experts.7.w3.weight": "model-00025-of-00193.safetensors", + "model.layers.9.block_sparse_moe.gate.weight": "model-00022-of-00193.safetensors", + "model.layers.9.input_layernorm.weight": "model-00025-of-00193.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00025-of-00193.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00022-of-00193.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00022-of-00193.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00022-of-00193.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00022-of-00193.safetensors", + "model.norm.weight": "model-00193-of-00193.safetensors" + } +} diff --git a/tokenizer.model b/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..69016c8865cef891c0708d4734453821d9bba334 --- /dev/null +++ b/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:008293028e1a9d9a1038d9b63d989a2319797dfeaa03f171093a57b33a3a8277 +size 1831879 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..58b40b80ea960fdd3d52734465af72cea8291687 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,135 @@ +{ + "extra_ids": 0, + "do_lower_case": false, + "keep_accents": true, + "bos_token": "", + "eos_token": "", + "unk_token": "", + "pad_token": "", + "mask_token": "", + "cls_token": "", + "sep_token": "", + "padding_side": "left", + "sp_model_kwargs": {}, + "special_tokens_map_file": null, + "tokenizer_class": "LlamaTokenizer", + "added_tokens_decoder": { + "4": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "5": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "6": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "7": { + "content": "<|system|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "8": { + "content": "<|assistant|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "9": { + "content": "<|user|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "10": { + "content": "<|available_tools|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "11": { + "content": "<|tool_calls|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "12": { + "content": "<|tool_results|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "13": { + "content": "<|code|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "14": { + "content": "<|file|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "102397": { + "content": "<|prefix|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "102398": { + "content": "<|suffix|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "102399": { + "content": "<|middle|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "add_prefix_space": false, + "add_dummy_prefix_space": false, + "legacy": false, + "add_bos_token": false, + "add_eos_token": true +} \ No newline at end of file