diff --git a/config.json b/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..6a79ed4f762a20277d72938d07bef81c0c4d36f3
--- /dev/null
+++ b/config.json
@@ -0,0 +1,30 @@
+{
+ "architectures": [
+ "MixtralForCausalLM"
+ ],
+ "attention_dropout": 0.0,
+ "bos_token_id": 1,
+ "eos_token_id": 2,
+ "hidden_act": "silu",
+ "hidden_size": 8192,
+ "initializer_range": 0.02,
+ "intermediate_size": 28672,
+ "max_position_embeddings": 8192,
+ "model_type": "mixtral",
+ "num_attention_heads": 64,
+ "num_experts_per_tok": 2,
+ "num_hidden_layers": 80,
+ "num_key_value_heads": 8,
+ "num_local_experts": 8,
+ "output_router_logits": false,
+ "rms_norm_eps": 1e-05,
+ "rope_theta": 10000,
+ "router_aux_loss_coef": 0.001,
+ "router_jitter_noise": 0.0,
+ "sliding_window": null,
+ "tie_word_embeddings": false,
+ "torch_dtype": "bfloat16",
+ "transformers_version": "4.40.1",
+ "use_cache": true,
+ "vocab_size": 102400
+}
diff --git a/generation_config.json b/generation_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..8a0505d8393c7cf690949e7d46272ba7f6f1b491
--- /dev/null
+++ b/generation_config.json
@@ -0,0 +1,6 @@
+{
+ "_from_model_config": true,
+ "bos_token_id": 1,
+ "eos_token_id": 2,
+ "transformers_version": "4.40.1"
+}
diff --git a/model-00001-of-00193.safetensors b/model-00001-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..81cef2a0e75314d474361f29e5d81d292551b99a
--- /dev/null
+++ b/model-00001-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ee252ad16ab8c4435a2164d2b2bfe5cea977965f6f76dd23456d47b0c58f4ff4
+size 4798416360
diff --git a/model-00002-of-00193.safetensors b/model-00002-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..be929b5728d450990d804cf24cf8d23477a58d5a
--- /dev/null
+++ b/model-00002-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c3333c630d3f5ac9010294ef4e92bc38412a331c0db15bc6f70a23a06e5bf731
+size 4697621824
diff --git a/model-00003-of-00193.safetensors b/model-00003-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..cc04b74011d80476c3bb7cddc05d6935cb79d0ee
--- /dev/null
+++ b/model-00003-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:df83e85801809a6942e3e5b59cef2aaa5375b16c891d61e1c4e98db8ffb8866f
+size 4999776368
diff --git a/model-00004-of-00193.safetensors b/model-00004-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..da0633ba2d901ef2b9e6f1230377da9ae805d8a3
--- /dev/null
+++ b/model-00004-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:189a741b81ee3acf943d6c183f90cfffd8e5709dcfe495206268858103ed022c
+size 4697621824
diff --git a/model-00005-of-00193.safetensors b/model-00005-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..478a6513eaa04d36810be8a0914cac7a05782f2c
--- /dev/null
+++ b/model-00005-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2ae663921ef1122966cdf9aee71a333aeb205b9d9ea8fd13a4be9398bc1d7619
+size 4697621824
diff --git a/model-00006-of-00193.safetensors b/model-00006-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..e883534dd8d03898f61089690ce16484e4ef38ff
--- /dev/null
+++ b/model-00006-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:afe163192cf2d8f2a361e3d8bbadbfd5f718192a43b44db589d191814ec587bd
+size 4999776360
diff --git a/model-00007-of-00193.safetensors b/model-00007-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..3a41698c2324afa9eb8403bd714daf44a61dc25e
--- /dev/null
+++ b/model-00007-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fe5caa449b493e719280cbb958fea8235e2c878df9cd08649fc6cc593e1abc78
+size 4697621824
diff --git a/model-00008-of-00193.safetensors b/model-00008-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..237875ca978ef431b3b1ce7bd949326e030c4b06
--- /dev/null
+++ b/model-00008-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e4cf92ee935581fe6cf30619f6eda144219e40800d1377c9abf44c08d3a59c5b
+size 4999776368
diff --git a/model-00009-of-00193.safetensors b/model-00009-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..57dc9884a1c04847ed3807e680d3d1b388d3bee8
--- /dev/null
+++ b/model-00009-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cc72e39d1fbd62196cc578319d6aa84ce38f67530900ce26732db5ef86cad86b
+size 4697621824
diff --git a/model-00010-of-00193.safetensors b/model-00010-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..525594584becfa6325f18b8287e6e77b6994defb
--- /dev/null
+++ b/model-00010-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e68bd35227d019a3223a9b793b1a9d731b463034de068aa31f7e4f7313bbb11b
+size 4999776368
diff --git a/model-00011-of-00193.safetensors b/model-00011-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..1738e748817bb3d47910e1367a6f030134c27c9b
--- /dev/null
+++ b/model-00011-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1a022b272f9d49183fe6d795ad50811dc401e29a79f0a74f22bb6d5df2ea8ef0
+size 4697621824
diff --git a/model-00012-of-00193.safetensors b/model-00012-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..059a8e26bc7ccad2e5bd92d5a50fddd9fafc5f7f
--- /dev/null
+++ b/model-00012-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ef2dbcb9712a8df369fa3c813dd1054bae9011569a029ae80fe1083c35933a59
+size 4697621824
diff --git a/model-00013-of-00193.safetensors b/model-00013-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..24271b451633c04c77bac1d7db53154231cb5051
--- /dev/null
+++ b/model-00013-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2c7b350c3263c954ef4ec7e0070441a36637463153135cab60f80f4f7872f741
+size 4999776368
diff --git a/model-00014-of-00193.safetensors b/model-00014-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..135d33d3f1b74e2c5978761bb8b3a8c73bd343ce
--- /dev/null
+++ b/model-00014-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8deee86a710b0cfd5e7c67324f18595e91909dad542d4d14db30ce2101e5b081
+size 4697621824
diff --git a/model-00015-of-00193.safetensors b/model-00015-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..fc47ed178a3e9db2fbe884078cfa8d81340f59e0
--- /dev/null
+++ b/model-00015-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3e7d0278748ed7940a7b14dd36d3a80ad19c35785f871e50183c7733a948a724
+size 4999776368
diff --git a/model-00016-of-00193.safetensors b/model-00016-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..dcf90615a6663196ff46f69f81ae73bc1f9fd4c5
--- /dev/null
+++ b/model-00016-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:07976079b71f53a3a3eaacfa1e240d7d3a8aabf9754276ea79417bdea7817cee
+size 4697621824
diff --git a/model-00017-of-00193.safetensors b/model-00017-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..eb95a0b07a2cbbf36ed57c09f87233939c171096
--- /dev/null
+++ b/model-00017-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:93302b80a1d80c14ae25cb5f27bece312611b3bf9809ab8a24bdaecbde644b91
+size 4697621824
diff --git a/model-00018-of-00193.safetensors b/model-00018-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..99ef072a3709b1b14eb67dde030e627885be4405
--- /dev/null
+++ b/model-00018-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bc0c919af75445055948abb1a445581ed43f1e2afe90f36d4fdef6c448ee107d
+size 4999776360
diff --git a/model-00019-of-00193.safetensors b/model-00019-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..2b8498d23cbace7321de84060faa734321f1a47a
--- /dev/null
+++ b/model-00019-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dbafdd88bcd8282c56994af921a80894b0bb487aebc46cff149bcb23e47b4285
+size 4697621824
diff --git a/model-00020-of-00193.safetensors b/model-00020-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..0b02c883644373cb94d80322272053a38f70a51d
--- /dev/null
+++ b/model-00020-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:596c0ffd061a09655ae3eba5f16f65310e803c0880a07e522965d6d99421f6a6
+size 4999776368
diff --git a/model-00021-of-00193.safetensors b/model-00021-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..9556c0cb495a8df498868016715bb28ba23e972f
--- /dev/null
+++ b/model-00021-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bacbf6e443f07d1efb53df4638f5af90304175bac1f302479bf021d428e81148
+size 4697621824
diff --git a/model-00022-of-00193.safetensors b/model-00022-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..f4852f14912844e86d602ecd9f86641cb19e14cc
--- /dev/null
+++ b/model-00022-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c4a9395833ed6359da4aeed7cd08ed90689e9fc3300fc402f75ed2ec1cda0593
+size 4999776368
diff --git a/model-00023-of-00193.safetensors b/model-00023-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..f93ebf6658aa365208581d666085dd2c7bf739ba
--- /dev/null
+++ b/model-00023-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d90f2616c0f99d6a56ccbdad6a7630b7e91109f720b53415580c5de384a0e736
+size 4697621824
diff --git a/model-00024-of-00193.safetensors b/model-00024-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..8a89892c775475331932fab8b4381c23b6072d02
--- /dev/null
+++ b/model-00024-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a39870099876fd74ec68602ea661ce39f66e9041885258ea11be112966fb7e26
+size 4697621824
diff --git a/model-00025-of-00193.safetensors b/model-00025-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..c0561c798d9f8a6f0ad0a430df9e40642deb380e
--- /dev/null
+++ b/model-00025-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3e5836e49e659131cdf87fee401fbe51cfff97b71d286b742c7e1fbceff55eb0
+size 4999776376
diff --git a/model-00026-of-00193.safetensors b/model-00026-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..d836472c272df4a92514a6417c58aac986090a9e
--- /dev/null
+++ b/model-00026-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c507aa31e9b6ee798c09e947f3f5089eb571a6b605e5a39aace12351fda4c9d3
+size 4697621832
diff --git a/model-00027-of-00193.safetensors b/model-00027-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..7d294deb8668765baa5ec448fb6161710f7b2c73
--- /dev/null
+++ b/model-00027-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2b4446513879f1b726c9559010aa6058b817ace54d9eb46e5de0facef2626581
+size 4999776384
diff --git a/model-00028-of-00193.safetensors b/model-00028-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..be340eefc3edfb609b763875a5462257bd1d44a4
--- /dev/null
+++ b/model-00028-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ea1a71f2d2bd15cff751bb66a3c8d27749f3704e40b2208474da2176006227bf
+size 4697621832
diff --git a/model-00029-of-00193.safetensors b/model-00029-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..7e9f6261b265383d83bfe4dc6e6e2b07c9bc0e77
--- /dev/null
+++ b/model-00029-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2b4abab2cdce9bb38eb89ff80e795911f4f71813bb464df85c8209325ff1c46d
+size 4697621832
diff --git a/model-00030-of-00193.safetensors b/model-00030-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..1c37994f6bc4cf5fbc93ab6de68174313b40167f
--- /dev/null
+++ b/model-00030-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:29db0d27978a265bc80e07b6bd4f2efb4f8c01461c60d99f8381067a8dd50909
+size 4999776376
diff --git a/model-00031-of-00193.safetensors b/model-00031-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..a4dc4495a798036181c455707817fa6a34091b53
--- /dev/null
+++ b/model-00031-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:02462b1df37884f2986c603c332361121d372119ce839c6098990cab53f15eb5
+size 4697621832
diff --git a/model-00032-of-00193.safetensors b/model-00032-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..044453d1e615a77ac8625b15ddcefed5bd439bcc
--- /dev/null
+++ b/model-00032-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7c4eeedb92e550923996d1ebb1b46b588bf4be945e4cf9497bc523d66bde4b4f
+size 4999776384
diff --git a/model-00033-of-00193.safetensors b/model-00033-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..9e179b1ca77ec53abb62acb74c77dabe9b39504e
--- /dev/null
+++ b/model-00033-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7a99d2be426eebfef1b4678a4377176ac842cba6ef081d3748634b8600db8f67
+size 4697621832
diff --git a/model-00034-of-00193.safetensors b/model-00034-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..b4b77ec1f501f3873dbce69250a7d3d2abf29814
--- /dev/null
+++ b/model-00034-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:aa7bce8e7c07c333f493acc7e1b33b50f9cf844ef068214c9a6e6825248fec0e
+size 4999776384
diff --git a/model-00035-of-00193.safetensors b/model-00035-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..c3d33cc65b57f6c3edf2b87d9c6918c4fe010b5d
--- /dev/null
+++ b/model-00035-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cd008924df9e8454526eb8182db193823f5ab9bcede55cecc6edc5a0bbf3d4cc
+size 4697621832
diff --git a/model-00036-of-00193.safetensors b/model-00036-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..996b13f39aad780151fcbef344112a8a101648a1
--- /dev/null
+++ b/model-00036-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:85cfc4e8abe8f331fffd9a14f36fe7ad947745478d246740c689a237f7612eaa
+size 4697621832
diff --git a/model-00037-of-00193.safetensors b/model-00037-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..a458ed77c4c609bfc4fbbe945bf23eead59773af
--- /dev/null
+++ b/model-00037-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:081ad372bc9087d33be2e944e567534ee6cb1f08c25d263ff7a53c1b7ca1825e
+size 4999776384
diff --git a/model-00038-of-00193.safetensors b/model-00038-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..2ae0c4a8e21ed667b91b3ed92baf37d75f9c09d2
--- /dev/null
+++ b/model-00038-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6e5ad5f72e6aebbdd82800a207516f877027991181baa9fc2c475b5564db5976
+size 4697621832
diff --git a/model-00039-of-00193.safetensors b/model-00039-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..ec77d1cf723927c12f2e9d8d92432704765aecca
--- /dev/null
+++ b/model-00039-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:92da26032ce48bad788f02fe0e9068f877b0f1a221da2bd5c7ff11beb36cf19b
+size 4999776384
diff --git a/model-00040-of-00193.safetensors b/model-00040-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..d07be6d281eea3138a0f5580caff0bc9c643b82c
--- /dev/null
+++ b/model-00040-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f79012a6a0c9c5c354abce055b8b6de17d03fa37d75f051f897595c5f87312dc
+size 4697621832
diff --git a/model-00041-of-00193.safetensors b/model-00041-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..34853ce1a02308424f03354ad4e78f7431961eb1
--- /dev/null
+++ b/model-00041-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:992e9afc1d8de96fb2707984a3a71989c1db8c7d99cf8f104581604abf802d1d
+size 4697621832
diff --git a/model-00042-of-00193.safetensors b/model-00042-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..8818c57a88aadece9812715f33dd1f101a80c1dd
--- /dev/null
+++ b/model-00042-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ff6acab943a6495eed7d5dc05185ae861d61c3b9480d9ea69152da1d0f9ea40c
+size 4999776376
diff --git a/model-00043-of-00193.safetensors b/model-00043-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..832e56d1f3673bfe1c458265c639c6f5de17b49e
--- /dev/null
+++ b/model-00043-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1783274dc9898c35fcd648e89ed451a9b7cd748248763b182bbe5135e1e3f075
+size 4697621832
diff --git a/model-00044-of-00193.safetensors b/model-00044-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..bc30e47e2f544025a85681b9d694a4e88020057a
--- /dev/null
+++ b/model-00044-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4f958646f8a171661301c52e669585ffc9b51b965f9e632395180ff91a53684c
+size 4999776384
diff --git a/model-00045-of-00193.safetensors b/model-00045-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..3f8036648344054e612dc0ed874c58bae1cc3aad
--- /dev/null
+++ b/model-00045-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fb928574f7cc6e8917af69fdcd87c34dab1a5821764876c40e906e1c61de4121
+size 4697621832
diff --git a/model-00046-of-00193.safetensors b/model-00046-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..00bece69cf76d7bd6392c62dbf45044a00307117
--- /dev/null
+++ b/model-00046-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:547ee660c6f6370fce2381d7269d68885bd6907002c33984bf6a994f47fcd33d
+size 4999776384
diff --git a/model-00047-of-00193.safetensors b/model-00047-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..446c5b5289868ed49e1f1319e235c17a9aac0d54
--- /dev/null
+++ b/model-00047-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c0cf7b06048654d0957d473678744765b4bc4845687565b2a080e21259582238
+size 4697621832
diff --git a/model-00048-of-00193.safetensors b/model-00048-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..591c8ad7c922fb5f1da50f5a3cdf8cf74ef8ca19
--- /dev/null
+++ b/model-00048-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6277ea1fa27e3af9ba567f9e7a18300cdb685ec1ae90c09b505662fc3b8f1cf2
+size 4697621832
diff --git a/model-00049-of-00193.safetensors b/model-00049-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..5ba3070c26ee84a0d526cc87ab6493b2cd99e8ef
--- /dev/null
+++ b/model-00049-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a6c4bc9de1ef11f92db4f2e1fb88813b85da6f44694e3ee0a3597061e01c8f62
+size 4999776384
diff --git a/model-00050-of-00193.safetensors b/model-00050-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..0bac7ca4ca119402c1d05645e104ceb9b6d79a2f
--- /dev/null
+++ b/model-00050-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:657e0aa84f7db4167d41822eb25b4722121e9bdaf421e0694453ba4c41de70ee
+size 4697621832
diff --git a/model-00051-of-00193.safetensors b/model-00051-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..1c1cde92c43e3530395c55999a6057b45b1edd0c
--- /dev/null
+++ b/model-00051-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f5ebe177acc5eccefefef7065c170a9c37e83c8382ee0f6c7ccbcd104a930532
+size 4999776384
diff --git a/model-00052-of-00193.safetensors b/model-00052-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..4ea2d844ebc65265f0df148454f782ea0c74adbb
--- /dev/null
+++ b/model-00052-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dc598cf279567ed9cf7feb1bd5d2cb52f52c6ecb58a49746932fdcf44ae2b7b3
+size 4697621832
diff --git a/model-00053-of-00193.safetensors b/model-00053-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..bce4f42bbc6d907b36cf43564672db1461ce142e
--- /dev/null
+++ b/model-00053-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3988a3c17d7ba96b25dd9a6dec7660713736585f3c16c8b8b9424695c74651ea
+size 4697621832
diff --git a/model-00054-of-00193.safetensors b/model-00054-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..286c233597eaef6adb150d02fcef2417eae936e8
--- /dev/null
+++ b/model-00054-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ad76982f5745a44cb9c455e03dd21ba1f286f6ea8a1fc836f59b6255c2ac2497
+size 4999776376
diff --git a/model-00055-of-00193.safetensors b/model-00055-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..dcc2466ed3cfbae5e714d7d27f5043075b1f98d9
--- /dev/null
+++ b/model-00055-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:163f9e4332d828c8045450ad8181eac967b8d38df0b39ea2d75c06e4ec9f08bd
+size 4697621832
diff --git a/model-00056-of-00193.safetensors b/model-00056-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..f8017bc34078d32a57f0981659ac62c3758fc1e0
--- /dev/null
+++ b/model-00056-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:df4821e30a6784c8bf9bc73ed070625fe055396eb9df78aa61621e72da9e4a4b
+size 4999776384
diff --git a/model-00057-of-00193.safetensors b/model-00057-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..e4567e1f03b2c646a19c5cbb5feac98612828bcf
--- /dev/null
+++ b/model-00057-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6965e7d8030d3de060037bdc8adbeac315dea541119f2659e61610fc8311f1e9
+size 4697621832
diff --git a/model-00058-of-00193.safetensors b/model-00058-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..e700edc30fa2f6f627fa9e027365cb671208e1b8
--- /dev/null
+++ b/model-00058-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:170f53b4a8d18d1dd9ba2230ce72d37beed5ce0c24bca37b9258a994e68ddb9e
+size 4999776384
diff --git a/model-00059-of-00193.safetensors b/model-00059-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..285d43ce9c9a6f4a1abe371736da5bf21e36caba
--- /dev/null
+++ b/model-00059-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f9b7826842de6a2d7d1dca0a90c8e1bb96ae4b9d1bca8c632685e08583a13b94
+size 4697621832
diff --git a/model-00060-of-00193.safetensors b/model-00060-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..c46d7850021c77d5da0265ad512e9a98ce41426b
--- /dev/null
+++ b/model-00060-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ed9e94294298f048080ca9c8f5059331f5d7771c5a3d9aa1137677d544fbf4a4
+size 4697621832
diff --git a/model-00061-of-00193.safetensors b/model-00061-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..972395da4894fd5b0a5abe3d12f01e67c79ec3c6
--- /dev/null
+++ b/model-00061-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b9b02ede041b1076b795ad7f4a91e72eaa569072d29dfa48bfd0b04f388a588d
+size 4999776384
diff --git a/model-00062-of-00193.safetensors b/model-00062-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..4eb5c316357085cea5142af1cc865e62e466601f
--- /dev/null
+++ b/model-00062-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0412fc0fec19d3bf520513b2404a7a0fdd0a58d03e861d9553ff4a8e2b562a2c
+size 4697621832
diff --git a/model-00063-of-00193.safetensors b/model-00063-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..213f158dce1f61ab10ca2676267893f2a83cff6e
--- /dev/null
+++ b/model-00063-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c98f00dabc9e20f48070eb556725f771b79d1e2b4fa526035dfea84cdd62f296
+size 4999776384
diff --git a/model-00064-of-00193.safetensors b/model-00064-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..6f996d96076fc06a0896c74ced37cbb6106ce073
--- /dev/null
+++ b/model-00064-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:af919763df7d570ca4f21b7f3eb35cb82dc88b05cff1a08fab1f78fcfd4ab478
+size 4697621832
diff --git a/model-00065-of-00193.safetensors b/model-00065-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..92f7a13600be0362deec058b59a25c3e055b05a6
--- /dev/null
+++ b/model-00065-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3e659e1ad1ef371237c0ce046773abfb9d3e5a4d89132b7b628ffcec570a952e
+size 4697621832
diff --git a/model-00066-of-00193.safetensors b/model-00066-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..37ed23a811b99ccc6db0cecd6c3ee5ce2f930e2e
--- /dev/null
+++ b/model-00066-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f3b7c581ca71cd37540537195c8b360286b0a48dbe9cac522ae8921ca9d6ae8b
+size 4999776376
diff --git a/model-00067-of-00193.safetensors b/model-00067-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..a270c3bda72925eaedf744c535ca5416dcac378b
--- /dev/null
+++ b/model-00067-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f49660e4522a5c399ff73d4ca511ca6f59757028867baad24e1f239a35e39ec1
+size 4697621832
diff --git a/model-00068-of-00193.safetensors b/model-00068-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..0b848ef9a8d41177fd75bd5e46d03d09a7d863f1
--- /dev/null
+++ b/model-00068-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ad61ae1e69e49dbd155b07bd490eaa34a5f165d0f4c1f5153b8635a56dfbc93e
+size 4999776384
diff --git a/model-00069-of-00193.safetensors b/model-00069-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..0a295873e34b744a399f6901bb4fb59b18b63c0a
--- /dev/null
+++ b/model-00069-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3c2db9bf9ba4bcebd6ff3c5f9e3a6a2dc59bb7d1cf16838be5963bc20876649c
+size 4697621832
diff --git a/model-00070-of-00193.safetensors b/model-00070-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..f3ef26b8d3a039c026314eb3c15f18f4c3e7a8fa
--- /dev/null
+++ b/model-00070-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a3e9476a25a2af266a07c8c9a471bd6caf45beb9b211a07361716aef53d3b56e
+size 4999776384
diff --git a/model-00071-of-00193.safetensors b/model-00071-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..ed19ecd0a33ecbf9eed789b2b3823eace270621a
--- /dev/null
+++ b/model-00071-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c4734cbde2d5778714cafd9845a73db88738c79b801a9d5e4da408f93d38ebbd
+size 4697621832
diff --git a/model-00072-of-00193.safetensors b/model-00072-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..4c390b3e6eba3e21f4c02622843b14c37814ed01
--- /dev/null
+++ b/model-00072-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5d33ee321f8e2a2655b8977463233cfd708fe74ec94804c970cfb7509aa75c94
+size 4697621832
diff --git a/model-00073-of-00193.safetensors b/model-00073-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..097bc8815e3fe1fe5b5a3e064115660e27256aac
--- /dev/null
+++ b/model-00073-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2cf09f145cd0a016bba13bce5707f59c67e6895d06e45fc1016c2fc96ac3750d
+size 4999776384
diff --git a/model-00074-of-00193.safetensors b/model-00074-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..49014c9a13111339c597c6039a71be3a1e746183
--- /dev/null
+++ b/model-00074-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c0270524dac343c41c14f5e3f4b87a4a9ab1eb56958075b360eb47e62d4ec21b
+size 4697621832
diff --git a/model-00075-of-00193.safetensors b/model-00075-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..5c62544d735d00fb1aa05e6396ae0c339be52ade
--- /dev/null
+++ b/model-00075-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6c8d078ea3ef7b17c26495ac4999d3fd1410aab1ca72bc19de9044d77066354d
+size 4999776384
diff --git a/model-00076-of-00193.safetensors b/model-00076-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..fffa6931f547b6f9bc3803d094bb5269519c9c89
--- /dev/null
+++ b/model-00076-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bf6abda95d1783b0d203f79245b070aecb024b22be9c49cf03f1351f701bc399
+size 4697621832
diff --git a/model-00077-of-00193.safetensors b/model-00077-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..1ca140cf4c104c14abe1b4a390ac832cb379f660
--- /dev/null
+++ b/model-00077-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:41b6486e7c1ee510997a7a2265767a9bf7c817cd94bfaee8b92c55cd4b116f1e
+size 4697621832
diff --git a/model-00078-of-00193.safetensors b/model-00078-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..14f1378ebdfd1ab30b26f72d8dfe9e05f0f841e4
--- /dev/null
+++ b/model-00078-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5175d83710777be84ae0d338c7e63c2063471b3a203eeb6e0e636ed0775719f2
+size 4999776376
diff --git a/model-00079-of-00193.safetensors b/model-00079-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..150dcf29074ecfaae59b4f9b5df3b67d63cc2f92
--- /dev/null
+++ b/model-00079-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:05db07c3b6227757a514466c06ebd0ac6f15c1e2604f64869102c26bc8943160
+size 4697621832
diff --git a/model-00080-of-00193.safetensors b/model-00080-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..e0a780b47b3a2c3aea912f7bd50744263c02552f
--- /dev/null
+++ b/model-00080-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:72c98577b9c573a5d54fbdc54c6e7553e351c2de552d5cf2b2895573eb33915a
+size 4999776384
diff --git a/model-00081-of-00193.safetensors b/model-00081-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..ad31169d54a65efd2e95f856006bf3ca1422482e
--- /dev/null
+++ b/model-00081-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3d5f523801b157da7589695767a5315119036f5d477c60c9ab2be6cc89fe70ff
+size 4697621832
diff --git a/model-00082-of-00193.safetensors b/model-00082-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..4bf530187d30e2f6d2c5549eeed04b863cadefe3
--- /dev/null
+++ b/model-00082-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d148581040e9fc880ec9315d050132581fe5fd05ad41204494ec2cffc31e0442
+size 4999776384
diff --git a/model-00083-of-00193.safetensors b/model-00083-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..6973320623afeeddfa97418a75ddb4eeeeef962b
--- /dev/null
+++ b/model-00083-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2be61f137a6ef414b4a733e6e5fd4052680ea6987119c084f5e7030e6674dd92
+size 4697621832
diff --git a/model-00084-of-00193.safetensors b/model-00084-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..45b3e2c38c5a20d919a53b06c27ac58fc8541627
--- /dev/null
+++ b/model-00084-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b23012fdf19e7a5dc8a1e90e1cc0cfe870509e29909c237a83c959996b9cb840
+size 4697621832
diff --git a/model-00085-of-00193.safetensors b/model-00085-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..bb8c2ca86411f458f30d67380d23af45267d8ec3
--- /dev/null
+++ b/model-00085-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:752440ca3fa8f34a65175953dce903cc54f657d76a97ae97b1aa7b4c03cfed1e
+size 4999776384
diff --git a/model-00086-of-00193.safetensors b/model-00086-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..287f766b3190b5e9064a477862ade77f69c0c7c3
--- /dev/null
+++ b/model-00086-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0c07ca0bc5efdb4d09b8bdfb20627d948a4e361323838156833571939eb8ca21
+size 4697621832
diff --git a/model-00087-of-00193.safetensors b/model-00087-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..b38cba0d1d59611b24721bb05d87c4c76a2ab2d1
--- /dev/null
+++ b/model-00087-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a0250d5313b6bb9c799f88a7e0a08ec98a2fba1a9f9475a5b616c7411e2a9138
+size 4999776384
diff --git a/model-00088-of-00193.safetensors b/model-00088-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..237309fcfcf0187edb72658238476106e5dadec8
--- /dev/null
+++ b/model-00088-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2594f36b24b62549a6a3eb89bc9d0442ef6f837810b770c5638b4b4693421fb4
+size 4697621832
diff --git a/model-00089-of-00193.safetensors b/model-00089-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..e59071cf6a5ab0b4e380e38f2ae5e2ea2a3c92f6
--- /dev/null
+++ b/model-00089-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9247cb97499ffb358923dbb731d14745aa840a637a79e106abab24c184bc2ece
+size 4697621832
diff --git a/model-00090-of-00193.safetensors b/model-00090-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..6bc7e802d568c4dc7b2ac495c34f360c78132a4c
--- /dev/null
+++ b/model-00090-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8f9d8045b15aae64e759d706cb5f3bbbe5dbd6ca1891a80dd32d2e5c8098a220
+size 4999776376
diff --git a/model-00091-of-00193.safetensors b/model-00091-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..cfbae1b5b18b0275414eadc6977ee1e8c69dee32
--- /dev/null
+++ b/model-00091-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cc77898fc03d4259846ce302329118f429e9e317db7148ad17929ea3b011dfb2
+size 4697621832
diff --git a/model-00092-of-00193.safetensors b/model-00092-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..81cb84e56afd75bcd57a79062f1fda959e72218d
--- /dev/null
+++ b/model-00092-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:46f0969c7d0c01ed055e0db8df7488e61f0f264953079763f5563f5ab48f4675
+size 4999776384
diff --git a/model-00093-of-00193.safetensors b/model-00093-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..0c8d83f3431b5dd23733433f5892fbffca3bd630
--- /dev/null
+++ b/model-00093-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f6f5fd902980d9512e596803e9f5d48381cf1cdcecb280fac55fbac08749ccc1
+size 4697621832
diff --git a/model-00094-of-00193.safetensors b/model-00094-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..90ee6c303aabdc3123db2891ac32b63b4e0b46da
--- /dev/null
+++ b/model-00094-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:da1ebd8b5da6265b86675bf73e952f559844adcc6ca8270ba8a96ba39557d806
+size 4999776384
diff --git a/model-00095-of-00193.safetensors b/model-00095-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..e0b09c4be0fd3275064fffe5224060f96d62f506
--- /dev/null
+++ b/model-00095-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7b42db38327fb1de230626b4e9982f49b6e6bcb43dc6f2c57a468a13505b91b4
+size 4697621832
diff --git a/model-00096-of-00193.safetensors b/model-00096-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..61b1b66e8be86e87064129a363bd129f7ab7c455
--- /dev/null
+++ b/model-00096-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:26b93a9adb72f73c9a0a02bd623fad2b5eaa5779310e393c5d004a135574bb57
+size 4697621832
diff --git a/model-00097-of-00193.safetensors b/model-00097-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..637510f1ff7c0c8238383a76a2977f7e5bac3499
--- /dev/null
+++ b/model-00097-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:edc30bc5f846870a3baf7cc256f098670ed80283810347054900399955297308
+size 4999776384
diff --git a/model-00098-of-00193.safetensors b/model-00098-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..48fd6f3b200cc4067d23f1db644dcc29842a5caa
--- /dev/null
+++ b/model-00098-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2c0aaf6ba4c285509011f2ab8c521166caf1cdb14dc12a2098ab2e8bb35f8973
+size 4697621832
diff --git a/model-00099-of-00193.safetensors b/model-00099-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..845af006aac42cb8651acb1972ab7173dde6c245
--- /dev/null
+++ b/model-00099-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a46f55931ade46bb5374eb824bd8e7604c4d28cf596c81190bf21a4abe8cf9af
+size 4999776384
diff --git a/model-00100-of-00193.safetensors b/model-00100-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..338d800afa8391419b348c9c19dd0dab2be08dda
--- /dev/null
+++ b/model-00100-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c971cbe3e14fe922d00d54257f14b7bb40e910799b6897f561e8e5efcba201bf
+size 4697621832
diff --git a/model-00101-of-00193.safetensors b/model-00101-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..ba8c9ff7c44ceec7762b78302b7c7b1435976e6e
--- /dev/null
+++ b/model-00101-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f7c762570c44fca18dce2ff73f58f3a801b94e894831a82049491ee3d3a38409
+size 4697621832
diff --git a/model-00102-of-00193.safetensors b/model-00102-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..fa15f400ab11a4b37c550ced474df1252c05fba5
--- /dev/null
+++ b/model-00102-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4c5aee38b60568c2edc442c29c64df21cd49dca8829fc07dd6d5390538dea364
+size 4999776376
diff --git a/model-00103-of-00193.safetensors b/model-00103-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..d34085ea154ac4c35162e9bcd5cbc21af2ecccfd
--- /dev/null
+++ b/model-00103-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:97a2cf17388b52149e0a597802dada04150a6c64e63a20027141b19f53bc8003
+size 4697621832
diff --git a/model-00104-of-00193.safetensors b/model-00104-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..49605193e9609749da7140f5a200ea7ac3f68e1f
--- /dev/null
+++ b/model-00104-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c4d961d5feb7c81536865c7f43e466b7507a2dc00c3379edf3926630503e9475
+size 4999776384
diff --git a/model-00105-of-00193.safetensors b/model-00105-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..3ed1831da3cc23608e8d6c952ea295ee2ebf9cb9
--- /dev/null
+++ b/model-00105-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e076743f44a25ff96710d840563c4086dbf73d270efd65c28e144b8002af77e6
+size 4697621832
diff --git a/model-00106-of-00193.safetensors b/model-00106-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..70a1b187ab379b0a20f0db5f43a7410817a87a1c
--- /dev/null
+++ b/model-00106-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:881827012871c670451912f7ec4e69ea2dda997f2c213656eb7eefbc06ed7a5d
+size 4999776384
diff --git a/model-00107-of-00193.safetensors b/model-00107-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..adab7183a5bb441ce55cc7714083f4fe7590e890
--- /dev/null
+++ b/model-00107-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:887adb9e8183c8200f5033638aeb8999d1fa1fdeeed7344dcfcac08eabd952d7
+size 4697621832
diff --git a/model-00108-of-00193.safetensors b/model-00108-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..0865c3f26ea2a86b828824cd044380555e0424fb
--- /dev/null
+++ b/model-00108-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:119843f8f492c62f6c8b905b7e652fce019465c314e5c9c67c59d25d8b533755
+size 4697621832
diff --git a/model-00109-of-00193.safetensors b/model-00109-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..f2426d449ae42c970ac2205d97aa00cc41990b97
--- /dev/null
+++ b/model-00109-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:91991bc319a83d11519d7399728534cf9387748949f2e013e0aa12f6e3af8149
+size 4999776384
diff --git a/model-00110-of-00193.safetensors b/model-00110-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..d23f3f4ef1d9bca35631adb32756c456477d51c4
--- /dev/null
+++ b/model-00110-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dd8d316270880d85f8400d725ec5299619ec544709756f9e32e907eef06f0c4a
+size 4697621832
diff --git a/model-00111-of-00193.safetensors b/model-00111-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..e6313948f6d04756a66152ebf487b63288586334
--- /dev/null
+++ b/model-00111-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:10d20920b9925da46a1c7eb5f8a43b78a45364c03bd9b79909d89dbd2e62ec6f
+size 4999776384
diff --git a/model-00112-of-00193.safetensors b/model-00112-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..ad7a103bb365032d41a56f2e53e27adcceb93a7f
--- /dev/null
+++ b/model-00112-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:69da9d5c24a23feb57531b14e5633ab6b47aa158397cdb2fc24404269a565f9d
+size 4697621832
diff --git a/model-00113-of-00193.safetensors b/model-00113-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..db6cd50e298d5c1642edb5653e42ed255881a34d
--- /dev/null
+++ b/model-00113-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dde909281f8a8556102e2fa91cc1c1c759259bdbfaae7832e58e75260d4f2f62
+size 4697621832
diff --git a/model-00114-of-00193.safetensors b/model-00114-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..763840e9b8e3326f0f22da676dd73426a69da9ca
--- /dev/null
+++ b/model-00114-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7f866d46b52655082d0528d07c4de5ff2dc0d89c8ce909bee032fc0f5ee8165c
+size 4999776376
diff --git a/model-00115-of-00193.safetensors b/model-00115-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..efdc1fa9445f8ce22b88a89a7797e2fb46e09d7d
--- /dev/null
+++ b/model-00115-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5a6504a0528259fa4ba0c696da72125a040f59bf3d228e8741f71053a6847c92
+size 4697621832
diff --git a/model-00116-of-00193.safetensors b/model-00116-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..2121addaeee61632d8a6958a53a9cf04faf5cff0
--- /dev/null
+++ b/model-00116-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b038e1db995d5055634a548a193f41b74b34c49e08bb4d5725f412f48e407cf4
+size 4999776384
diff --git a/model-00117-of-00193.safetensors b/model-00117-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..9d7f3396a78bc9a8ac45ed8b6d85d988451e153d
--- /dev/null
+++ b/model-00117-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6ceefcae314c8bceb4b0d56e5deb31dabaa079215ddbc648326ca2e81c8750b6
+size 4697621832
diff --git a/model-00118-of-00193.safetensors b/model-00118-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..98bcd590c1133a6f493303df846f1181f96316d7
--- /dev/null
+++ b/model-00118-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9a1e7adedddd741dbcdab73d579b1a475474b5b0e53817453f7cf6bdc00e93d3
+size 4999776384
diff --git a/model-00119-of-00193.safetensors b/model-00119-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..06898652bfd41bc3790e2e04a9b7cf131506e9f2
--- /dev/null
+++ b/model-00119-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5047ad5b186ce3b4ff89a5b50e95661a4f9ac4cc6304f324fb4c68e10f299a29
+size 4697621832
diff --git a/model-00120-of-00193.safetensors b/model-00120-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..bbdc2ad3b5d6b6ecf1624f6680ccb1b0b43523d4
--- /dev/null
+++ b/model-00120-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5928e46d02f4b0a0f37afdeddbd1f9745faebbfab16c6123b581699f47883849
+size 4697621832
diff --git a/model-00121-of-00193.safetensors b/model-00121-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..0d4b6a7c7265af902b099c4c8b177c051ab2c83d
--- /dev/null
+++ b/model-00121-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:07f2d3d862bbf55ab80f9b79b2e63af6516b7a13d45df56e30bdb9f7b868edbe
+size 4999776384
diff --git a/model-00122-of-00193.safetensors b/model-00122-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..87d80a9d766f6165b194abd0d0637be974bffc98
--- /dev/null
+++ b/model-00122-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:38f75432f135f1662ffd330eadd67ce0789fd76237468dffc6a29aed845353a9
+size 4697621832
diff --git a/model-00123-of-00193.safetensors b/model-00123-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..1e83ad94129a8befd8d4aeeaa0a5b12c33dbe5bc
--- /dev/null
+++ b/model-00123-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5a3284bdb5f7c9e5208341867125b116d2749fa57d8771cae0a11ffac1913606
+size 4999776384
diff --git a/model-00124-of-00193.safetensors b/model-00124-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..fae2a785ee0a736f4cef2dde69ee83df3a31a8cd
--- /dev/null
+++ b/model-00124-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d3a51d5b994357118a00e3274950b327de40be0e5da42befa44bb9115bb9701e
+size 4697621832
diff --git a/model-00125-of-00193.safetensors b/model-00125-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..b57d10ec288c2ff15ef31f4779307a61e81b9035
--- /dev/null
+++ b/model-00125-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:22d1da9732ee78c7b5d6917dbeacf4f7025295905a18c8f1c11420fc210f8eee
+size 4697621832
diff --git a/model-00126-of-00193.safetensors b/model-00126-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..f4ea2d9063c79a0452136da8655c2547483d704a
--- /dev/null
+++ b/model-00126-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7eeba3eb6cd46054661baa11d0a6b3f0d99f7bcb6d5ded30e3de4c507626b482
+size 4999776376
diff --git a/model-00127-of-00193.safetensors b/model-00127-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..d6ea121a5d5a4c5a8c18266e372130c254c4ce94
--- /dev/null
+++ b/model-00127-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ecf1ebacf941b30a43d46763d92a162de40d9a9a99bb3d6fc13e04c79e901698
+size 4697621832
diff --git a/model-00128-of-00193.safetensors b/model-00128-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..b9d030c30e229ee1362d31f6a299d9e91c6dcf71
--- /dev/null
+++ b/model-00128-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f761766f72ddb53ad000e44333e1c4be0bd64035dcfb674857da8e2e0fc6f320
+size 4999776384
diff --git a/model-00129-of-00193.safetensors b/model-00129-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..fe81c6533a0d991c993a37de6e9803db643147ad
--- /dev/null
+++ b/model-00129-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:76b218e1211b0c27f7cc659222a2492d505f9d5ee11e7698fe3997cf85748939
+size 4697621832
diff --git a/model-00130-of-00193.safetensors b/model-00130-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..7509f6dbe9424d87b168639a88edf6284c7e615c
--- /dev/null
+++ b/model-00130-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c409754bfb394dee252df90ecf85fcd196ee7880a24b88ab403c5baab14a9825
+size 4999776384
diff --git a/model-00131-of-00193.safetensors b/model-00131-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..25005c75b36839d5936bbf896f302a30d20d52ac
--- /dev/null
+++ b/model-00131-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e660c41a65f98c2d7130aa54a2a9c0fe44d6c63f60c04e3a8c6d8d2002a973a8
+size 4697621832
diff --git a/model-00132-of-00193.safetensors b/model-00132-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..4154343c4375259cddfcc83b0cdbb3d509995188
--- /dev/null
+++ b/model-00132-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:110e29ac22893e606f9e9999301166b6743aef30b18f840c1cc3ca340b9f711f
+size 4697621832
diff --git a/model-00133-of-00193.safetensors b/model-00133-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..0cd30ef6b34f75f92172307f20fc9e7fdba64f1b
--- /dev/null
+++ b/model-00133-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:925a25a8ed44ca0812cf12214097719c58981951b2b063ca08cfde78116e6768
+size 4999776384
diff --git a/model-00134-of-00193.safetensors b/model-00134-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..5877cc1b86a572e914bf5b5683d570eedb47a6d3
--- /dev/null
+++ b/model-00134-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:126bb45fdf8626a647123038fff285af8a6dadfaede190ed8194b41d9e9df8a9
+size 4697621832
diff --git a/model-00135-of-00193.safetensors b/model-00135-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..cf002cc03186363e4d7e4bbe77f3e798161d02a3
--- /dev/null
+++ b/model-00135-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0ce919ce1288871c0c6522495946bd4c1c473f3515e349846410149e620285fc
+size 4999776384
diff --git a/model-00136-of-00193.safetensors b/model-00136-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..741339bd65eb4fe6e6a37621fc40433d6ad6b068
--- /dev/null
+++ b/model-00136-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c3fc8383cd930cfc5eb4c6843158cd3db4c25e5920f057c8502feb62d54d7c37
+size 4697621832
diff --git a/model-00137-of-00193.safetensors b/model-00137-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..bdc890becfe0b2e6cd2576d33f1b356f2ce356e4
--- /dev/null
+++ b/model-00137-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:88eed1cfe1b8de28aa000e907092ef7814f21d462e0d290f38f323fedc9deb45
+size 4697621832
diff --git a/model-00138-of-00193.safetensors b/model-00138-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..f217686479cb30a391e2250ce5474c8baedf9f42
--- /dev/null
+++ b/model-00138-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a068f18e989eae2b91390225f7373ee9c56e13d7d8ee6f447f4b0bfb387a25f2
+size 4999776376
diff --git a/model-00139-of-00193.safetensors b/model-00139-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..acc5ad183cfe6b4e080200dbdc91540e4e8388b1
--- /dev/null
+++ b/model-00139-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d3ec5d0ec8dde7a1248a1cf02e66f347a87b0e141a4e7d8c2e1f2260a3276418
+size 4697621832
diff --git a/model-00140-of-00193.safetensors b/model-00140-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..cd52f1552a21bbe9facb6af6ac0d49b0fedbaabb
--- /dev/null
+++ b/model-00140-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:10cf4a675aa55b0423bddb98fe31d3d1ebc29f8dd8f9ca364ecc7192c68aa990
+size 4999776384
diff --git a/model-00141-of-00193.safetensors b/model-00141-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..bfacc1f93207c2be9d7f365a128aeb2e83d9fb94
--- /dev/null
+++ b/model-00141-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4689a3c289cbb3fbad6f0a4bd6b862d5239f7a94c8a5e83b0d14fc20e6fb36e0
+size 4697621832
diff --git a/model-00142-of-00193.safetensors b/model-00142-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..bf493f191b7a34cfa39af8c9eaedf8c59a2eab0b
--- /dev/null
+++ b/model-00142-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8de525df897fba17317140825d02c6150047befdc29754a07fc7d5dcf421122d
+size 4999776384
diff --git a/model-00143-of-00193.safetensors b/model-00143-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..5f85c0944173f122bcf7c91ef5133ebaa21c64c8
--- /dev/null
+++ b/model-00143-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1da5e5f8dfb1183c61b622e2278b0c2f4f74be1e2ff633ebc5b31c5b129e9ca2
+size 4697621832
diff --git a/model-00144-of-00193.safetensors b/model-00144-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..468a419ddc0c289c51c5c804144f6786a5772fa4
--- /dev/null
+++ b/model-00144-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cd59e15d6317def9b273addc31fee05cc5d4d9caafc5dfa95dc0c3d1c482a174
+size 4697621832
diff --git a/model-00145-of-00193.safetensors b/model-00145-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..497bcd487567bc204b02219ff0187dd74c4522af
--- /dev/null
+++ b/model-00145-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fb0ff8ddf9d45372bb79cc1e1d284c3d6556d4f6fde4856c9e147e2ab46002d6
+size 4999776384
diff --git a/model-00146-of-00193.safetensors b/model-00146-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..a2dbe1cb084bb50bdf8fe979c403245cfde628f0
--- /dev/null
+++ b/model-00146-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ca2d6a93888d951daa9d1b715e4eaacc5423e15cdd464dfed996cde211b7278a
+size 4697621832
diff --git a/model-00147-of-00193.safetensors b/model-00147-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..176a065e05156ab4520980a77b7f13fe1355e87e
--- /dev/null
+++ b/model-00147-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e4a2059a767a8c18b14f729348f15218521dd2d57648f68179800b3eaca91715
+size 4999776384
diff --git a/model-00148-of-00193.safetensors b/model-00148-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..9518f507eeaaf79367613b2187fd6efdbf119f09
--- /dev/null
+++ b/model-00148-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:90d0d8f31c373756e4de60dbe5d952d6aa590e1abea7619d9c109821d26b6561
+size 4697621832
diff --git a/model-00149-of-00193.safetensors b/model-00149-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..bc789789e40e7edf51eb2254f13de9813f098b60
--- /dev/null
+++ b/model-00149-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e4fcab319563045c071bd7f02cdd03baa711959c860a3039bd808bb0ab9cf311
+size 4697621832
diff --git a/model-00150-of-00193.safetensors b/model-00150-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..382efe12709e8f51aad9a36aa58f92a4ae59b540
--- /dev/null
+++ b/model-00150-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f5da83c29b33ca958ccb04984fbb469874f37f0653733421e52a09678566b0f3
+size 4999776376
diff --git a/model-00151-of-00193.safetensors b/model-00151-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..f627f0cd63737f9b2b35af2df47f817173028d7a
--- /dev/null
+++ b/model-00151-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:444a51010a38794edd52a6bc822fdebcfa2dd84d86a1ffb0d5e2cb60d183de78
+size 4697621832
diff --git a/model-00152-of-00193.safetensors b/model-00152-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..0465e4af865570ce437760ffa37000e9897d33d2
--- /dev/null
+++ b/model-00152-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bde4309d332177b39cb4c59b77b685edd861d1f920d280c214f1e600e8bcd1de
+size 4999776384
diff --git a/model-00153-of-00193.safetensors b/model-00153-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..b3d071125ca0d7c620a1603b370430d33b054342
--- /dev/null
+++ b/model-00153-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3d045926a006646b6a5721c85ee9a716861c1b9aac4c172a9f10c5f6d14525f5
+size 4697621832
diff --git a/model-00154-of-00193.safetensors b/model-00154-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..a018d9b4087454763ea805320bb63843a5909d1c
--- /dev/null
+++ b/model-00154-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d7e14ec8337fa2a4ff214e6901bf417889a28a5dafb287fa0f50796ce162c92c
+size 4999776384
diff --git a/model-00155-of-00193.safetensors b/model-00155-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..f1ce407a46f0da385cc7e95dccad5c0f8a714737
--- /dev/null
+++ b/model-00155-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:261762864b33ef1588b2c0c4d23d646bfe077458c8df300538850b4775601e4c
+size 4697621832
diff --git a/model-00156-of-00193.safetensors b/model-00156-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..beeb42ca77384ab124b69307bd82780463537b94
--- /dev/null
+++ b/model-00156-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b58754d7d13a954822d75ba7938f39de374f4ebe254e1bc3161cb934e698803b
+size 4697621832
diff --git a/model-00157-of-00193.safetensors b/model-00157-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..2b9e565623bdafeb2d541abfc6e989a3828adb02
--- /dev/null
+++ b/model-00157-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cc95ca083a16e5f48229ecc35793f81b80b34bb25a4acf828c87a83960956c39
+size 4999776384
diff --git a/model-00158-of-00193.safetensors b/model-00158-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..6114b4df5d2b0f0227c699edbc7711933dc445f6
--- /dev/null
+++ b/model-00158-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3bdbafd497e186ead6eeac135bfa7803d1a12a29b6b4198a961542e92972fa5a
+size 4697621832
diff --git a/model-00159-of-00193.safetensors b/model-00159-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..477616e2e736fa5ef957f3a6cd42434443d03afb
--- /dev/null
+++ b/model-00159-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f03a0d3f9040c798335509fa71dd561b223bf39e6a1ac6f8f20cfaf85d8f83e9
+size 4999776384
diff --git a/model-00160-of-00193.safetensors b/model-00160-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..3e878f3b18ec2ca0127a3bf62142f4b94a9d8960
--- /dev/null
+++ b/model-00160-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1452a9496aa9b0c12c5e45a1b4ea0b32d4e1310bb9e3d0c01a7579fec566b5ce
+size 4697621832
diff --git a/model-00161-of-00193.safetensors b/model-00161-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..0283dd343a655ba1d4811922ee186a7b98a6d923
--- /dev/null
+++ b/model-00161-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a785ef6e98ef89e0201f7d65429643e786ac68167f574163263987ce0f1fbe0f
+size 4697621832
diff --git a/model-00162-of-00193.safetensors b/model-00162-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..89da60b7b790d965f593267d91d65cf03e636b5e
--- /dev/null
+++ b/model-00162-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c5ba67ebbc6cabfd9a2b63c22b950440640429a4722335e0e980ef7cdb76ca03
+size 4999776376
diff --git a/model-00163-of-00193.safetensors b/model-00163-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..2b33b50f9e0d7d7e3e1be721eec36d7da406b7e6
--- /dev/null
+++ b/model-00163-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ede8434dabd70cb7195dabb6b6620a8d8fcef66cf53e973dc3cd87fd9c5752de
+size 4697621832
diff --git a/model-00164-of-00193.safetensors b/model-00164-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..3f87c9a833c2a0a2e4f479de8dab02eda3c7b304
--- /dev/null
+++ b/model-00164-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:150f015a41af36ee9c3145678e9ca953ba16f0b53b6ce2a4d39727fbae9547c2
+size 4999776384
diff --git a/model-00165-of-00193.safetensors b/model-00165-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..9c23e5b51bc33ba0df2a751c669d0f56dca58d9d
--- /dev/null
+++ b/model-00165-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c27d9aed540fe855dd333b85497e7257c923a3b356709cae2b022909c7dbaee0
+size 4697621832
diff --git a/model-00166-of-00193.safetensors b/model-00166-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..ea61ecfc766f4054f13ab43264facf6b1b2c807d
--- /dev/null
+++ b/model-00166-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a6579719faf3b1651d1e863b876d5703379146d94cb2d36e0f645fe7fc1acdce
+size 4999776384
diff --git a/model-00167-of-00193.safetensors b/model-00167-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..591bcc6974f15ed776b187ae9dd49d8759d6ce66
--- /dev/null
+++ b/model-00167-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8dd44065971478845f05a9ba7338792cfabaa2b7bb9da1c9f3509b4451bd4de8
+size 4697621832
diff --git a/model-00168-of-00193.safetensors b/model-00168-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..b1ab6cc0298862474d058510290171debd544bc7
--- /dev/null
+++ b/model-00168-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:66b1328939130e6d5132887563737669b765df64ce60d2e6ba3d7dee70d81db4
+size 4697621832
diff --git a/model-00169-of-00193.safetensors b/model-00169-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..6b67a7b399dbbb44f1eced4770c0d9a028b9ca3e
--- /dev/null
+++ b/model-00169-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ea110126c92ff7abed12927c59dbf9405afd3e5b3d2f8a926417c301af826dfe
+size 4999776384
diff --git a/model-00170-of-00193.safetensors b/model-00170-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..6f96e494e250d76e131c1ceed00780377844cd06
--- /dev/null
+++ b/model-00170-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5b2de104d5957d20eb75005c6b0370413a8b362dd2d206b17a25ece9e64a50d5
+size 4697621832
diff --git a/model-00171-of-00193.safetensors b/model-00171-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..0f5548212ed8ab7b4c130f4174f736a505ddc846
--- /dev/null
+++ b/model-00171-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ca45a4797f9d6622d57d9f88e84a39d43b65b4b9f8e0dbc3e29c0740312f6028
+size 4999776384
diff --git a/model-00172-of-00193.safetensors b/model-00172-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..f3eb014b7b0da718232d6770b0a1294fbaabd8bd
--- /dev/null
+++ b/model-00172-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4ea725bcbfffb34a271e647db518e4f1507efdb30c997f2ef2b631e0f5914e5e
+size 4697621832
diff --git a/model-00173-of-00193.safetensors b/model-00173-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..ae0688fe9a0d8c315b7a7c13f30b98d617339c77
--- /dev/null
+++ b/model-00173-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d5d7030d3ad2e0024b1921e9d6c7b6a40ccad4351f91aeba61755b25919014c1
+size 4697621832
diff --git a/model-00174-of-00193.safetensors b/model-00174-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..8629cb4a2f706375ad499b106bfa3a67fda5b4d4
--- /dev/null
+++ b/model-00174-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f8d1c690afb97e86a95590cbf23b3ed7af88e40470fc5f8a94b1d20c161a33f6
+size 4999776376
diff --git a/model-00175-of-00193.safetensors b/model-00175-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..4eae12449a381c8a6a88b7cfeae5c44d4c606a85
--- /dev/null
+++ b/model-00175-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4d28138a0069504752efa3f608a7ee42e8dc2959d0073b518122f50ac8baf5bf
+size 4697621832
diff --git a/model-00176-of-00193.safetensors b/model-00176-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..6afe7a2a1f7ceed7452bb5d4006e2a0aefc79554
--- /dev/null
+++ b/model-00176-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0068977aef54a5f418b76edf25d212b4763dcc98ca9368bf743bd392e04f3275
+size 4999776384
diff --git a/model-00177-of-00193.safetensors b/model-00177-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..0b0805768bb8e71887960b60d44221e53eb7648b
--- /dev/null
+++ b/model-00177-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:53b8d81993e35d0f2d3ed8f32039ff2be71d81be73d27ea0dad8e4926585ca17
+size 4697621832
diff --git a/model-00178-of-00193.safetensors b/model-00178-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..5b7d51f984db1e5b930c7008809af5f83b598a52
--- /dev/null
+++ b/model-00178-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5c47d29e32e0a3f11ebdb903eb6fb02b812679d1e1c429195a63b5fd14a0b98c
+size 4999776384
diff --git a/model-00179-of-00193.safetensors b/model-00179-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..c33bf6039ba088515f05b07c461ed9e9159c693e
--- /dev/null
+++ b/model-00179-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6864c1e6ee363834ff838b68827965583d90086758ad5e6243c6b3df86843b5b
+size 4697621832
diff --git a/model-00180-of-00193.safetensors b/model-00180-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..136287d31a0262637b71e4d2933ab6f50cfe4c3e
--- /dev/null
+++ b/model-00180-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:daf8ef963b07f158a4189a1070ed2a02404884ebc9f8d9c1723bd9da4d2a215d
+size 4697621832
diff --git a/model-00181-of-00193.safetensors b/model-00181-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..e04d65093ffd593ca4c85f720da5e88de1e3b77c
--- /dev/null
+++ b/model-00181-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e3336066e10f5b54758a0039f396ce589c407c88b55454a31ccfdd1b44cbd71c
+size 4999776384
diff --git a/model-00182-of-00193.safetensors b/model-00182-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..621fcdd28e05b6d55631a9b2efff71dc682c7356
--- /dev/null
+++ b/model-00182-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0169e473f87e2da7ecd2678abbf6f7cb68ccae0adf064d9e32b389e0bf050d4e
+size 4697621832
diff --git a/model-00183-of-00193.safetensors b/model-00183-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..3bfea5c83331b8c96ac0d2faa1d94a02b154fc44
--- /dev/null
+++ b/model-00183-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5c8d3a5903f4d2a997c5de29c6b8b6aa78e685f004c685c60be02271e0a9f30e
+size 4999776384
diff --git a/model-00184-of-00193.safetensors b/model-00184-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..78cade2b6d997bdc6fb8b5056205671288b7dd86
--- /dev/null
+++ b/model-00184-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fabfa52e7bf7954bb0376a507f823cc947d821a27d2ad852e3c09d5c85bad310
+size 4697621832
diff --git a/model-00185-of-00193.safetensors b/model-00185-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..6dadce1b372d50363ce8f21b0b6d3b7604a844dd
--- /dev/null
+++ b/model-00185-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8f87ba64d02c5cf134f68f06fd68543d64119388a53f7d5f318911c1cf9a9b3e
+size 4697621832
diff --git a/model-00186-of-00193.safetensors b/model-00186-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..8068d98412004ff3aa98a4503ebacc8a74eeee44
--- /dev/null
+++ b/model-00186-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bff7e86b5f06044ee16861c9ca64558b3750fdf85516ffe079c3d5e01d7ceccc
+size 4999776376
diff --git a/model-00187-of-00193.safetensors b/model-00187-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..9d4d3c8eb976491c69e8d79d54abcc90418d9906
--- /dev/null
+++ b/model-00187-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5f4a6658e76322c3cce8100a78e10887dc164d6a45cda22caa70b1d0b8c13df5
+size 4697621832
diff --git a/model-00188-of-00193.safetensors b/model-00188-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..56e22353620b61a59b70a5aca3e54a5184a24d3b
--- /dev/null
+++ b/model-00188-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1114e67b5529b00f45efae2a23ca753a7db9229ebeab23ba0e0e3ed2117cac6c
+size 4999776384
diff --git a/model-00189-of-00193.safetensors b/model-00189-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..7fb36da8fe16554147fa6fde31d1389bb7d5cb0a
--- /dev/null
+++ b/model-00189-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6a92dddcbad6c4e90d2756dd476d66d50ac42a208dd667fd878939806bb6f637
+size 4697621832
diff --git a/model-00190-of-00193.safetensors b/model-00190-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..bc536f6d8e281c8898ac040d44d78fff34062971
--- /dev/null
+++ b/model-00190-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fef5e43e2f1c4b203feae387e6c9c44e67aeb1313c8ca875955727538083e74e
+size 4999776384
diff --git a/model-00191-of-00193.safetensors b/model-00191-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..c0f8fa53436def7b976f5c82740ba6e33a15b8e1
--- /dev/null
+++ b/model-00191-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:aaa456d45ff800e87b0a2fed7993dcd0febaed70a2de1f5dc3987ade4403d5cb
+size 4697621832
diff --git a/model-00192-of-00193.safetensors b/model-00192-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..25e7c75d6f4981ef586e5b70a1c10806dcf9924d
--- /dev/null
+++ b/model-00192-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0ba83361fd13982e136d787838f88ee3fe72aba6021f50b11dc63e0e8c285bff
+size 4697621832
diff --git a/model-00193-of-00193.safetensors b/model-00193-of-00193.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..a6968b90d4f7765f1704ea213d47fd91ac442d36
--- /dev/null
+++ b/model-00193-of-00193.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fd5df7cfbc689f09a98e6f21ce3f8e5a3148135a32d1287ce6808f8a2aacab40
+size 3556819928
diff --git a/model.safetensors.index.json b/model.safetensors.index.json
new file mode 100644
index 0000000000000000000000000000000000000000..697fb2e6d8d7810996873a10f8e09cd5f9faba1f
--- /dev/null
+++ b/model.safetensors.index.json
@@ -0,0 +1,2490 @@
+{
+ "metadata": {
+ "total_size": 929470889984
+ },
+ "weight_map": {
+ "lm_head.weight": "model-00193-of-00193.safetensors",
+ "model.embed_tokens.weight": "model-00001-of-00193.safetensors",
+ "model.layers.0.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00193.safetensors",
+ "model.layers.0.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00193.safetensors",
+ "model.layers.0.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00193.safetensors",
+ "model.layers.0.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00193.safetensors",
+ "model.layers.0.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00193.safetensors",
+ "model.layers.0.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00193.safetensors",
+ "model.layers.0.block_sparse_moe.experts.2.w1.weight": "model-00002-of-00193.safetensors",
+ "model.layers.0.block_sparse_moe.experts.2.w2.weight": "model-00002-of-00193.safetensors",
+ "model.layers.0.block_sparse_moe.experts.2.w3.weight": "model-00002-of-00193.safetensors",
+ "model.layers.0.block_sparse_moe.experts.3.w1.weight": "model-00002-of-00193.safetensors",
+ "model.layers.0.block_sparse_moe.experts.3.w2.weight": "model-00002-of-00193.safetensors",
+ "model.layers.0.block_sparse_moe.experts.3.w3.weight": "model-00002-of-00193.safetensors",
+ "model.layers.0.block_sparse_moe.experts.4.w1.weight": "model-00002-of-00193.safetensors",
+ "model.layers.0.block_sparse_moe.experts.4.w2.weight": "model-00002-of-00193.safetensors",
+ "model.layers.0.block_sparse_moe.experts.4.w3.weight": "model-00002-of-00193.safetensors",
+ "model.layers.0.block_sparse_moe.experts.5.w1.weight": "model-00002-of-00193.safetensors",
+ "model.layers.0.block_sparse_moe.experts.5.w2.weight": "model-00003-of-00193.safetensors",
+ "model.layers.0.block_sparse_moe.experts.5.w3.weight": "model-00003-of-00193.safetensors",
+ "model.layers.0.block_sparse_moe.experts.6.w1.weight": "model-00003-of-00193.safetensors",
+ "model.layers.0.block_sparse_moe.experts.6.w2.weight": "model-00003-of-00193.safetensors",
+ "model.layers.0.block_sparse_moe.experts.6.w3.weight": "model-00003-of-00193.safetensors",
+ "model.layers.0.block_sparse_moe.experts.7.w1.weight": "model-00003-of-00193.safetensors",
+ "model.layers.0.block_sparse_moe.experts.7.w2.weight": "model-00003-of-00193.safetensors",
+ "model.layers.0.block_sparse_moe.experts.7.w3.weight": "model-00003-of-00193.safetensors",
+ "model.layers.0.block_sparse_moe.gate.weight": "model-00001-of-00193.safetensors",
+ "model.layers.0.input_layernorm.weight": "model-00003-of-00193.safetensors",
+ "model.layers.0.post_attention_layernorm.weight": "model-00003-of-00193.safetensors",
+ "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00193.safetensors",
+ "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00193.safetensors",
+ "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00193.safetensors",
+ "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00193.safetensors",
+ "model.layers.1.block_sparse_moe.experts.0.w1.weight": "model-00003-of-00193.safetensors",
+ "model.layers.1.block_sparse_moe.experts.0.w2.weight": "model-00003-of-00193.safetensors",
+ "model.layers.1.block_sparse_moe.experts.0.w3.weight": "model-00004-of-00193.safetensors",
+ "model.layers.1.block_sparse_moe.experts.1.w1.weight": "model-00004-of-00193.safetensors",
+ "model.layers.1.block_sparse_moe.experts.1.w2.weight": "model-00004-of-00193.safetensors",
+ "model.layers.1.block_sparse_moe.experts.1.w3.weight": "model-00004-of-00193.safetensors",
+ "model.layers.1.block_sparse_moe.experts.2.w1.weight": "model-00004-of-00193.safetensors",
+ "model.layers.1.block_sparse_moe.experts.2.w2.weight": "model-00004-of-00193.safetensors",
+ "model.layers.1.block_sparse_moe.experts.2.w3.weight": "model-00004-of-00193.safetensors",
+ "model.layers.1.block_sparse_moe.experts.3.w1.weight": "model-00004-of-00193.safetensors",
+ "model.layers.1.block_sparse_moe.experts.3.w2.weight": "model-00004-of-00193.safetensors",
+ "model.layers.1.block_sparse_moe.experts.3.w3.weight": "model-00004-of-00193.safetensors",
+ "model.layers.1.block_sparse_moe.experts.4.w1.weight": "model-00005-of-00193.safetensors",
+ "model.layers.1.block_sparse_moe.experts.4.w2.weight": "model-00005-of-00193.safetensors",
+ "model.layers.1.block_sparse_moe.experts.4.w3.weight": "model-00005-of-00193.safetensors",
+ "model.layers.1.block_sparse_moe.experts.5.w1.weight": "model-00005-of-00193.safetensors",
+ "model.layers.1.block_sparse_moe.experts.5.w2.weight": "model-00005-of-00193.safetensors",
+ "model.layers.1.block_sparse_moe.experts.5.w3.weight": "model-00005-of-00193.safetensors",
+ "model.layers.1.block_sparse_moe.experts.6.w1.weight": "model-00005-of-00193.safetensors",
+ "model.layers.1.block_sparse_moe.experts.6.w2.weight": "model-00005-of-00193.safetensors",
+ "model.layers.1.block_sparse_moe.experts.6.w3.weight": "model-00005-of-00193.safetensors",
+ "model.layers.1.block_sparse_moe.experts.7.w1.weight": "model-00005-of-00193.safetensors",
+ "model.layers.1.block_sparse_moe.experts.7.w2.weight": "model-00006-of-00193.safetensors",
+ "model.layers.1.block_sparse_moe.experts.7.w3.weight": "model-00006-of-00193.safetensors",
+ "model.layers.1.block_sparse_moe.gate.weight": "model-00003-of-00193.safetensors",
+ "model.layers.1.input_layernorm.weight": "model-00006-of-00193.safetensors",
+ "model.layers.1.post_attention_layernorm.weight": "model-00006-of-00193.safetensors",
+ "model.layers.1.self_attn.k_proj.weight": "model-00003-of-00193.safetensors",
+ "model.layers.1.self_attn.o_proj.weight": "model-00003-of-00193.safetensors",
+ "model.layers.1.self_attn.q_proj.weight": "model-00003-of-00193.safetensors",
+ "model.layers.1.self_attn.v_proj.weight": "model-00003-of-00193.safetensors",
+ "model.layers.10.block_sparse_moe.experts.0.w1.weight": "model-00025-of-00193.safetensors",
+ "model.layers.10.block_sparse_moe.experts.0.w2.weight": "model-00025-of-00193.safetensors",
+ "model.layers.10.block_sparse_moe.experts.0.w3.weight": "model-00025-of-00193.safetensors",
+ "model.layers.10.block_sparse_moe.experts.1.w1.weight": "model-00025-of-00193.safetensors",
+ "model.layers.10.block_sparse_moe.experts.1.w2.weight": "model-00025-of-00193.safetensors",
+ "model.layers.10.block_sparse_moe.experts.1.w3.weight": "model-00025-of-00193.safetensors",
+ "model.layers.10.block_sparse_moe.experts.2.w1.weight": "model-00026-of-00193.safetensors",
+ "model.layers.10.block_sparse_moe.experts.2.w2.weight": "model-00026-of-00193.safetensors",
+ "model.layers.10.block_sparse_moe.experts.2.w3.weight": "model-00026-of-00193.safetensors",
+ "model.layers.10.block_sparse_moe.experts.3.w1.weight": "model-00026-of-00193.safetensors",
+ "model.layers.10.block_sparse_moe.experts.3.w2.weight": "model-00026-of-00193.safetensors",
+ "model.layers.10.block_sparse_moe.experts.3.w3.weight": "model-00026-of-00193.safetensors",
+ "model.layers.10.block_sparse_moe.experts.4.w1.weight": "model-00026-of-00193.safetensors",
+ "model.layers.10.block_sparse_moe.experts.4.w2.weight": "model-00026-of-00193.safetensors",
+ "model.layers.10.block_sparse_moe.experts.4.w3.weight": "model-00026-of-00193.safetensors",
+ "model.layers.10.block_sparse_moe.experts.5.w1.weight": "model-00026-of-00193.safetensors",
+ "model.layers.10.block_sparse_moe.experts.5.w2.weight": "model-00027-of-00193.safetensors",
+ "model.layers.10.block_sparse_moe.experts.5.w3.weight": "model-00027-of-00193.safetensors",
+ "model.layers.10.block_sparse_moe.experts.6.w1.weight": "model-00027-of-00193.safetensors",
+ "model.layers.10.block_sparse_moe.experts.6.w2.weight": "model-00027-of-00193.safetensors",
+ "model.layers.10.block_sparse_moe.experts.6.w3.weight": "model-00027-of-00193.safetensors",
+ "model.layers.10.block_sparse_moe.experts.7.w1.weight": "model-00027-of-00193.safetensors",
+ "model.layers.10.block_sparse_moe.experts.7.w2.weight": "model-00027-of-00193.safetensors",
+ "model.layers.10.block_sparse_moe.experts.7.w3.weight": "model-00027-of-00193.safetensors",
+ "model.layers.10.block_sparse_moe.gate.weight": "model-00025-of-00193.safetensors",
+ "model.layers.10.input_layernorm.weight": "model-00027-of-00193.safetensors",
+ "model.layers.10.post_attention_layernorm.weight": "model-00027-of-00193.safetensors",
+ "model.layers.10.self_attn.k_proj.weight": "model-00025-of-00193.safetensors",
+ "model.layers.10.self_attn.o_proj.weight": "model-00025-of-00193.safetensors",
+ "model.layers.10.self_attn.q_proj.weight": "model-00025-of-00193.safetensors",
+ "model.layers.10.self_attn.v_proj.weight": "model-00025-of-00193.safetensors",
+ "model.layers.11.block_sparse_moe.experts.0.w1.weight": "model-00027-of-00193.safetensors",
+ "model.layers.11.block_sparse_moe.experts.0.w2.weight": "model-00027-of-00193.safetensors",
+ "model.layers.11.block_sparse_moe.experts.0.w3.weight": "model-00028-of-00193.safetensors",
+ "model.layers.11.block_sparse_moe.experts.1.w1.weight": "model-00028-of-00193.safetensors",
+ "model.layers.11.block_sparse_moe.experts.1.w2.weight": "model-00028-of-00193.safetensors",
+ "model.layers.11.block_sparse_moe.experts.1.w3.weight": "model-00028-of-00193.safetensors",
+ "model.layers.11.block_sparse_moe.experts.2.w1.weight": "model-00028-of-00193.safetensors",
+ "model.layers.11.block_sparse_moe.experts.2.w2.weight": "model-00028-of-00193.safetensors",
+ "model.layers.11.block_sparse_moe.experts.2.w3.weight": "model-00028-of-00193.safetensors",
+ "model.layers.11.block_sparse_moe.experts.3.w1.weight": "model-00028-of-00193.safetensors",
+ "model.layers.11.block_sparse_moe.experts.3.w2.weight": "model-00028-of-00193.safetensors",
+ "model.layers.11.block_sparse_moe.experts.3.w3.weight": "model-00028-of-00193.safetensors",
+ "model.layers.11.block_sparse_moe.experts.4.w1.weight": "model-00029-of-00193.safetensors",
+ "model.layers.11.block_sparse_moe.experts.4.w2.weight": "model-00029-of-00193.safetensors",
+ "model.layers.11.block_sparse_moe.experts.4.w3.weight": "model-00029-of-00193.safetensors",
+ "model.layers.11.block_sparse_moe.experts.5.w1.weight": "model-00029-of-00193.safetensors",
+ "model.layers.11.block_sparse_moe.experts.5.w2.weight": "model-00029-of-00193.safetensors",
+ "model.layers.11.block_sparse_moe.experts.5.w3.weight": "model-00029-of-00193.safetensors",
+ "model.layers.11.block_sparse_moe.experts.6.w1.weight": "model-00029-of-00193.safetensors",
+ "model.layers.11.block_sparse_moe.experts.6.w2.weight": "model-00029-of-00193.safetensors",
+ "model.layers.11.block_sparse_moe.experts.6.w3.weight": "model-00029-of-00193.safetensors",
+ "model.layers.11.block_sparse_moe.experts.7.w1.weight": "model-00029-of-00193.safetensors",
+ "model.layers.11.block_sparse_moe.experts.7.w2.weight": "model-00030-of-00193.safetensors",
+ "model.layers.11.block_sparse_moe.experts.7.w3.weight": "model-00030-of-00193.safetensors",
+ "model.layers.11.block_sparse_moe.gate.weight": "model-00027-of-00193.safetensors",
+ "model.layers.11.input_layernorm.weight": "model-00030-of-00193.safetensors",
+ "model.layers.11.post_attention_layernorm.weight": "model-00030-of-00193.safetensors",
+ "model.layers.11.self_attn.k_proj.weight": "model-00027-of-00193.safetensors",
+ "model.layers.11.self_attn.o_proj.weight": "model-00027-of-00193.safetensors",
+ "model.layers.11.self_attn.q_proj.weight": "model-00027-of-00193.safetensors",
+ "model.layers.11.self_attn.v_proj.weight": "model-00027-of-00193.safetensors",
+ "model.layers.12.block_sparse_moe.experts.0.w1.weight": "model-00030-of-00193.safetensors",
+ "model.layers.12.block_sparse_moe.experts.0.w2.weight": "model-00030-of-00193.safetensors",
+ "model.layers.12.block_sparse_moe.experts.0.w3.weight": "model-00030-of-00193.safetensors",
+ "model.layers.12.block_sparse_moe.experts.1.w1.weight": "model-00030-of-00193.safetensors",
+ "model.layers.12.block_sparse_moe.experts.1.w2.weight": "model-00030-of-00193.safetensors",
+ "model.layers.12.block_sparse_moe.experts.1.w3.weight": "model-00030-of-00193.safetensors",
+ "model.layers.12.block_sparse_moe.experts.2.w1.weight": "model-00030-of-00193.safetensors",
+ "model.layers.12.block_sparse_moe.experts.2.w2.weight": "model-00030-of-00193.safetensors",
+ "model.layers.12.block_sparse_moe.experts.2.w3.weight": "model-00031-of-00193.safetensors",
+ "model.layers.12.block_sparse_moe.experts.3.w1.weight": "model-00031-of-00193.safetensors",
+ "model.layers.12.block_sparse_moe.experts.3.w2.weight": "model-00031-of-00193.safetensors",
+ "model.layers.12.block_sparse_moe.experts.3.w3.weight": "model-00031-of-00193.safetensors",
+ "model.layers.12.block_sparse_moe.experts.4.w1.weight": "model-00031-of-00193.safetensors",
+ "model.layers.12.block_sparse_moe.experts.4.w2.weight": "model-00031-of-00193.safetensors",
+ "model.layers.12.block_sparse_moe.experts.4.w3.weight": "model-00031-of-00193.safetensors",
+ "model.layers.12.block_sparse_moe.experts.5.w1.weight": "model-00031-of-00193.safetensors",
+ "model.layers.12.block_sparse_moe.experts.5.w2.weight": "model-00031-of-00193.safetensors",
+ "model.layers.12.block_sparse_moe.experts.5.w3.weight": "model-00031-of-00193.safetensors",
+ "model.layers.12.block_sparse_moe.experts.6.w1.weight": "model-00032-of-00193.safetensors",
+ "model.layers.12.block_sparse_moe.experts.6.w2.weight": "model-00032-of-00193.safetensors",
+ "model.layers.12.block_sparse_moe.experts.6.w3.weight": "model-00032-of-00193.safetensors",
+ "model.layers.12.block_sparse_moe.experts.7.w1.weight": "model-00032-of-00193.safetensors",
+ "model.layers.12.block_sparse_moe.experts.7.w2.weight": "model-00032-of-00193.safetensors",
+ "model.layers.12.block_sparse_moe.experts.7.w3.weight": "model-00032-of-00193.safetensors",
+ "model.layers.12.block_sparse_moe.gate.weight": "model-00030-of-00193.safetensors",
+ "model.layers.12.input_layernorm.weight": "model-00032-of-00193.safetensors",
+ "model.layers.12.post_attention_layernorm.weight": "model-00032-of-00193.safetensors",
+ "model.layers.12.self_attn.k_proj.weight": "model-00030-of-00193.safetensors",
+ "model.layers.12.self_attn.o_proj.weight": "model-00030-of-00193.safetensors",
+ "model.layers.12.self_attn.q_proj.weight": "model-00030-of-00193.safetensors",
+ "model.layers.12.self_attn.v_proj.weight": "model-00030-of-00193.safetensors",
+ "model.layers.13.block_sparse_moe.experts.0.w1.weight": "model-00032-of-00193.safetensors",
+ "model.layers.13.block_sparse_moe.experts.0.w2.weight": "model-00032-of-00193.safetensors",
+ "model.layers.13.block_sparse_moe.experts.0.w3.weight": "model-00032-of-00193.safetensors",
+ "model.layers.13.block_sparse_moe.experts.1.w1.weight": "model-00032-of-00193.safetensors",
+ "model.layers.13.block_sparse_moe.experts.1.w2.weight": "model-00033-of-00193.safetensors",
+ "model.layers.13.block_sparse_moe.experts.1.w3.weight": "model-00033-of-00193.safetensors",
+ "model.layers.13.block_sparse_moe.experts.2.w1.weight": "model-00033-of-00193.safetensors",
+ "model.layers.13.block_sparse_moe.experts.2.w2.weight": "model-00033-of-00193.safetensors",
+ "model.layers.13.block_sparse_moe.experts.2.w3.weight": "model-00033-of-00193.safetensors",
+ "model.layers.13.block_sparse_moe.experts.3.w1.weight": "model-00033-of-00193.safetensors",
+ "model.layers.13.block_sparse_moe.experts.3.w2.weight": "model-00033-of-00193.safetensors",
+ "model.layers.13.block_sparse_moe.experts.3.w3.weight": "model-00033-of-00193.safetensors",
+ "model.layers.13.block_sparse_moe.experts.4.w1.weight": "model-00033-of-00193.safetensors",
+ "model.layers.13.block_sparse_moe.experts.4.w2.weight": "model-00033-of-00193.safetensors",
+ "model.layers.13.block_sparse_moe.experts.4.w3.weight": "model-00034-of-00193.safetensors",
+ "model.layers.13.block_sparse_moe.experts.5.w1.weight": "model-00034-of-00193.safetensors",
+ "model.layers.13.block_sparse_moe.experts.5.w2.weight": "model-00034-of-00193.safetensors",
+ "model.layers.13.block_sparse_moe.experts.5.w3.weight": "model-00034-of-00193.safetensors",
+ "model.layers.13.block_sparse_moe.experts.6.w1.weight": "model-00034-of-00193.safetensors",
+ "model.layers.13.block_sparse_moe.experts.6.w2.weight": "model-00034-of-00193.safetensors",
+ "model.layers.13.block_sparse_moe.experts.6.w3.weight": "model-00034-of-00193.safetensors",
+ "model.layers.13.block_sparse_moe.experts.7.w1.weight": "model-00034-of-00193.safetensors",
+ "model.layers.13.block_sparse_moe.experts.7.w2.weight": "model-00034-of-00193.safetensors",
+ "model.layers.13.block_sparse_moe.experts.7.w3.weight": "model-00034-of-00193.safetensors",
+ "model.layers.13.block_sparse_moe.gate.weight": "model-00032-of-00193.safetensors",
+ "model.layers.13.input_layernorm.weight": "model-00034-of-00193.safetensors",
+ "model.layers.13.post_attention_layernorm.weight": "model-00034-of-00193.safetensors",
+ "model.layers.13.self_attn.k_proj.weight": "model-00032-of-00193.safetensors",
+ "model.layers.13.self_attn.o_proj.weight": "model-00032-of-00193.safetensors",
+ "model.layers.13.self_attn.q_proj.weight": "model-00032-of-00193.safetensors",
+ "model.layers.13.self_attn.v_proj.weight": "model-00032-of-00193.safetensors",
+ "model.layers.14.block_sparse_moe.experts.0.w1.weight": "model-00035-of-00193.safetensors",
+ "model.layers.14.block_sparse_moe.experts.0.w2.weight": "model-00035-of-00193.safetensors",
+ "model.layers.14.block_sparse_moe.experts.0.w3.weight": "model-00035-of-00193.safetensors",
+ "model.layers.14.block_sparse_moe.experts.1.w1.weight": "model-00035-of-00193.safetensors",
+ "model.layers.14.block_sparse_moe.experts.1.w2.weight": "model-00035-of-00193.safetensors",
+ "model.layers.14.block_sparse_moe.experts.1.w3.weight": "model-00035-of-00193.safetensors",
+ "model.layers.14.block_sparse_moe.experts.2.w1.weight": "model-00035-of-00193.safetensors",
+ "model.layers.14.block_sparse_moe.experts.2.w2.weight": "model-00035-of-00193.safetensors",
+ "model.layers.14.block_sparse_moe.experts.2.w3.weight": "model-00035-of-00193.safetensors",
+ "model.layers.14.block_sparse_moe.experts.3.w1.weight": "model-00035-of-00193.safetensors",
+ "model.layers.14.block_sparse_moe.experts.3.w2.weight": "model-00036-of-00193.safetensors",
+ "model.layers.14.block_sparse_moe.experts.3.w3.weight": "model-00036-of-00193.safetensors",
+ "model.layers.14.block_sparse_moe.experts.4.w1.weight": "model-00036-of-00193.safetensors",
+ "model.layers.14.block_sparse_moe.experts.4.w2.weight": "model-00036-of-00193.safetensors",
+ "model.layers.14.block_sparse_moe.experts.4.w3.weight": "model-00036-of-00193.safetensors",
+ "model.layers.14.block_sparse_moe.experts.5.w1.weight": "model-00036-of-00193.safetensors",
+ "model.layers.14.block_sparse_moe.experts.5.w2.weight": "model-00036-of-00193.safetensors",
+ "model.layers.14.block_sparse_moe.experts.5.w3.weight": "model-00036-of-00193.safetensors",
+ "model.layers.14.block_sparse_moe.experts.6.w1.weight": "model-00036-of-00193.safetensors",
+ "model.layers.14.block_sparse_moe.experts.6.w2.weight": "model-00036-of-00193.safetensors",
+ "model.layers.14.block_sparse_moe.experts.6.w3.weight": "model-00037-of-00193.safetensors",
+ "model.layers.14.block_sparse_moe.experts.7.w1.weight": "model-00037-of-00193.safetensors",
+ "model.layers.14.block_sparse_moe.experts.7.w2.weight": "model-00037-of-00193.safetensors",
+ "model.layers.14.block_sparse_moe.experts.7.w3.weight": "model-00037-of-00193.safetensors",
+ "model.layers.14.block_sparse_moe.gate.weight": "model-00034-of-00193.safetensors",
+ "model.layers.14.input_layernorm.weight": "model-00037-of-00193.safetensors",
+ "model.layers.14.post_attention_layernorm.weight": "model-00037-of-00193.safetensors",
+ "model.layers.14.self_attn.k_proj.weight": "model-00034-of-00193.safetensors",
+ "model.layers.14.self_attn.o_proj.weight": "model-00034-of-00193.safetensors",
+ "model.layers.14.self_attn.q_proj.weight": "model-00034-of-00193.safetensors",
+ "model.layers.14.self_attn.v_proj.weight": "model-00034-of-00193.safetensors",
+ "model.layers.15.block_sparse_moe.experts.0.w1.weight": "model-00037-of-00193.safetensors",
+ "model.layers.15.block_sparse_moe.experts.0.w2.weight": "model-00037-of-00193.safetensors",
+ "model.layers.15.block_sparse_moe.experts.0.w3.weight": "model-00037-of-00193.safetensors",
+ "model.layers.15.block_sparse_moe.experts.1.w1.weight": "model-00037-of-00193.safetensors",
+ "model.layers.15.block_sparse_moe.experts.1.w2.weight": "model-00037-of-00193.safetensors",
+ "model.layers.15.block_sparse_moe.experts.1.w3.weight": "model-00037-of-00193.safetensors",
+ "model.layers.15.block_sparse_moe.experts.2.w1.weight": "model-00038-of-00193.safetensors",
+ "model.layers.15.block_sparse_moe.experts.2.w2.weight": "model-00038-of-00193.safetensors",
+ "model.layers.15.block_sparse_moe.experts.2.w3.weight": "model-00038-of-00193.safetensors",
+ "model.layers.15.block_sparse_moe.experts.3.w1.weight": "model-00038-of-00193.safetensors",
+ "model.layers.15.block_sparse_moe.experts.3.w2.weight": "model-00038-of-00193.safetensors",
+ "model.layers.15.block_sparse_moe.experts.3.w3.weight": "model-00038-of-00193.safetensors",
+ "model.layers.15.block_sparse_moe.experts.4.w1.weight": "model-00038-of-00193.safetensors",
+ "model.layers.15.block_sparse_moe.experts.4.w2.weight": "model-00038-of-00193.safetensors",
+ "model.layers.15.block_sparse_moe.experts.4.w3.weight": "model-00038-of-00193.safetensors",
+ "model.layers.15.block_sparse_moe.experts.5.w1.weight": "model-00038-of-00193.safetensors",
+ "model.layers.15.block_sparse_moe.experts.5.w2.weight": "model-00039-of-00193.safetensors",
+ "model.layers.15.block_sparse_moe.experts.5.w3.weight": "model-00039-of-00193.safetensors",
+ "model.layers.15.block_sparse_moe.experts.6.w1.weight": "model-00039-of-00193.safetensors",
+ "model.layers.15.block_sparse_moe.experts.6.w2.weight": "model-00039-of-00193.safetensors",
+ "model.layers.15.block_sparse_moe.experts.6.w3.weight": "model-00039-of-00193.safetensors",
+ "model.layers.15.block_sparse_moe.experts.7.w1.weight": "model-00039-of-00193.safetensors",
+ "model.layers.15.block_sparse_moe.experts.7.w2.weight": "model-00039-of-00193.safetensors",
+ "model.layers.15.block_sparse_moe.experts.7.w3.weight": "model-00039-of-00193.safetensors",
+ "model.layers.15.block_sparse_moe.gate.weight": "model-00037-of-00193.safetensors",
+ "model.layers.15.input_layernorm.weight": "model-00039-of-00193.safetensors",
+ "model.layers.15.post_attention_layernorm.weight": "model-00039-of-00193.safetensors",
+ "model.layers.15.self_attn.k_proj.weight": "model-00037-of-00193.safetensors",
+ "model.layers.15.self_attn.o_proj.weight": "model-00037-of-00193.safetensors",
+ "model.layers.15.self_attn.q_proj.weight": "model-00037-of-00193.safetensors",
+ "model.layers.15.self_attn.v_proj.weight": "model-00037-of-00193.safetensors",
+ "model.layers.16.block_sparse_moe.experts.0.w1.weight": "model-00039-of-00193.safetensors",
+ "model.layers.16.block_sparse_moe.experts.0.w2.weight": "model-00039-of-00193.safetensors",
+ "model.layers.16.block_sparse_moe.experts.0.w3.weight": "model-00040-of-00193.safetensors",
+ "model.layers.16.block_sparse_moe.experts.1.w1.weight": "model-00040-of-00193.safetensors",
+ "model.layers.16.block_sparse_moe.experts.1.w2.weight": "model-00040-of-00193.safetensors",
+ "model.layers.16.block_sparse_moe.experts.1.w3.weight": "model-00040-of-00193.safetensors",
+ "model.layers.16.block_sparse_moe.experts.2.w1.weight": "model-00040-of-00193.safetensors",
+ "model.layers.16.block_sparse_moe.experts.2.w2.weight": "model-00040-of-00193.safetensors",
+ "model.layers.16.block_sparse_moe.experts.2.w3.weight": "model-00040-of-00193.safetensors",
+ "model.layers.16.block_sparse_moe.experts.3.w1.weight": "model-00040-of-00193.safetensors",
+ "model.layers.16.block_sparse_moe.experts.3.w2.weight": "model-00040-of-00193.safetensors",
+ "model.layers.16.block_sparse_moe.experts.3.w3.weight": "model-00040-of-00193.safetensors",
+ "model.layers.16.block_sparse_moe.experts.4.w1.weight": "model-00041-of-00193.safetensors",
+ "model.layers.16.block_sparse_moe.experts.4.w2.weight": "model-00041-of-00193.safetensors",
+ "model.layers.16.block_sparse_moe.experts.4.w3.weight": "model-00041-of-00193.safetensors",
+ "model.layers.16.block_sparse_moe.experts.5.w1.weight": "model-00041-of-00193.safetensors",
+ "model.layers.16.block_sparse_moe.experts.5.w2.weight": "model-00041-of-00193.safetensors",
+ "model.layers.16.block_sparse_moe.experts.5.w3.weight": "model-00041-of-00193.safetensors",
+ "model.layers.16.block_sparse_moe.experts.6.w1.weight": "model-00041-of-00193.safetensors",
+ "model.layers.16.block_sparse_moe.experts.6.w2.weight": "model-00041-of-00193.safetensors",
+ "model.layers.16.block_sparse_moe.experts.6.w3.weight": "model-00041-of-00193.safetensors",
+ "model.layers.16.block_sparse_moe.experts.7.w1.weight": "model-00041-of-00193.safetensors",
+ "model.layers.16.block_sparse_moe.experts.7.w2.weight": "model-00042-of-00193.safetensors",
+ "model.layers.16.block_sparse_moe.experts.7.w3.weight": "model-00042-of-00193.safetensors",
+ "model.layers.16.block_sparse_moe.gate.weight": "model-00039-of-00193.safetensors",
+ "model.layers.16.input_layernorm.weight": "model-00042-of-00193.safetensors",
+ "model.layers.16.post_attention_layernorm.weight": "model-00042-of-00193.safetensors",
+ "model.layers.16.self_attn.k_proj.weight": "model-00039-of-00193.safetensors",
+ "model.layers.16.self_attn.o_proj.weight": "model-00039-of-00193.safetensors",
+ "model.layers.16.self_attn.q_proj.weight": "model-00039-of-00193.safetensors",
+ "model.layers.16.self_attn.v_proj.weight": "model-00039-of-00193.safetensors",
+ "model.layers.17.block_sparse_moe.experts.0.w1.weight": "model-00042-of-00193.safetensors",
+ "model.layers.17.block_sparse_moe.experts.0.w2.weight": "model-00042-of-00193.safetensors",
+ "model.layers.17.block_sparse_moe.experts.0.w3.weight": "model-00042-of-00193.safetensors",
+ "model.layers.17.block_sparse_moe.experts.1.w1.weight": "model-00042-of-00193.safetensors",
+ "model.layers.17.block_sparse_moe.experts.1.w2.weight": "model-00042-of-00193.safetensors",
+ "model.layers.17.block_sparse_moe.experts.1.w3.weight": "model-00042-of-00193.safetensors",
+ "model.layers.17.block_sparse_moe.experts.2.w1.weight": "model-00042-of-00193.safetensors",
+ "model.layers.17.block_sparse_moe.experts.2.w2.weight": "model-00042-of-00193.safetensors",
+ "model.layers.17.block_sparse_moe.experts.2.w3.weight": "model-00043-of-00193.safetensors",
+ "model.layers.17.block_sparse_moe.experts.3.w1.weight": "model-00043-of-00193.safetensors",
+ "model.layers.17.block_sparse_moe.experts.3.w2.weight": "model-00043-of-00193.safetensors",
+ "model.layers.17.block_sparse_moe.experts.3.w3.weight": "model-00043-of-00193.safetensors",
+ "model.layers.17.block_sparse_moe.experts.4.w1.weight": "model-00043-of-00193.safetensors",
+ "model.layers.17.block_sparse_moe.experts.4.w2.weight": "model-00043-of-00193.safetensors",
+ "model.layers.17.block_sparse_moe.experts.4.w3.weight": "model-00043-of-00193.safetensors",
+ "model.layers.17.block_sparse_moe.experts.5.w1.weight": "model-00043-of-00193.safetensors",
+ "model.layers.17.block_sparse_moe.experts.5.w2.weight": "model-00043-of-00193.safetensors",
+ "model.layers.17.block_sparse_moe.experts.5.w3.weight": "model-00043-of-00193.safetensors",
+ "model.layers.17.block_sparse_moe.experts.6.w1.weight": "model-00044-of-00193.safetensors",
+ "model.layers.17.block_sparse_moe.experts.6.w2.weight": "model-00044-of-00193.safetensors",
+ "model.layers.17.block_sparse_moe.experts.6.w3.weight": "model-00044-of-00193.safetensors",
+ "model.layers.17.block_sparse_moe.experts.7.w1.weight": "model-00044-of-00193.safetensors",
+ "model.layers.17.block_sparse_moe.experts.7.w2.weight": "model-00044-of-00193.safetensors",
+ "model.layers.17.block_sparse_moe.experts.7.w3.weight": "model-00044-of-00193.safetensors",
+ "model.layers.17.block_sparse_moe.gate.weight": "model-00042-of-00193.safetensors",
+ "model.layers.17.input_layernorm.weight": "model-00044-of-00193.safetensors",
+ "model.layers.17.post_attention_layernorm.weight": "model-00044-of-00193.safetensors",
+ "model.layers.17.self_attn.k_proj.weight": "model-00042-of-00193.safetensors",
+ "model.layers.17.self_attn.o_proj.weight": "model-00042-of-00193.safetensors",
+ "model.layers.17.self_attn.q_proj.weight": "model-00042-of-00193.safetensors",
+ "model.layers.17.self_attn.v_proj.weight": "model-00042-of-00193.safetensors",
+ "model.layers.18.block_sparse_moe.experts.0.w1.weight": "model-00044-of-00193.safetensors",
+ "model.layers.18.block_sparse_moe.experts.0.w2.weight": "model-00044-of-00193.safetensors",
+ "model.layers.18.block_sparse_moe.experts.0.w3.weight": "model-00044-of-00193.safetensors",
+ "model.layers.18.block_sparse_moe.experts.1.w1.weight": "model-00044-of-00193.safetensors",
+ "model.layers.18.block_sparse_moe.experts.1.w2.weight": "model-00045-of-00193.safetensors",
+ "model.layers.18.block_sparse_moe.experts.1.w3.weight": "model-00045-of-00193.safetensors",
+ "model.layers.18.block_sparse_moe.experts.2.w1.weight": "model-00045-of-00193.safetensors",
+ "model.layers.18.block_sparse_moe.experts.2.w2.weight": "model-00045-of-00193.safetensors",
+ "model.layers.18.block_sparse_moe.experts.2.w3.weight": "model-00045-of-00193.safetensors",
+ "model.layers.18.block_sparse_moe.experts.3.w1.weight": "model-00045-of-00193.safetensors",
+ "model.layers.18.block_sparse_moe.experts.3.w2.weight": "model-00045-of-00193.safetensors",
+ "model.layers.18.block_sparse_moe.experts.3.w3.weight": "model-00045-of-00193.safetensors",
+ "model.layers.18.block_sparse_moe.experts.4.w1.weight": "model-00045-of-00193.safetensors",
+ "model.layers.18.block_sparse_moe.experts.4.w2.weight": "model-00045-of-00193.safetensors",
+ "model.layers.18.block_sparse_moe.experts.4.w3.weight": "model-00046-of-00193.safetensors",
+ "model.layers.18.block_sparse_moe.experts.5.w1.weight": "model-00046-of-00193.safetensors",
+ "model.layers.18.block_sparse_moe.experts.5.w2.weight": "model-00046-of-00193.safetensors",
+ "model.layers.18.block_sparse_moe.experts.5.w3.weight": "model-00046-of-00193.safetensors",
+ "model.layers.18.block_sparse_moe.experts.6.w1.weight": "model-00046-of-00193.safetensors",
+ "model.layers.18.block_sparse_moe.experts.6.w2.weight": "model-00046-of-00193.safetensors",
+ "model.layers.18.block_sparse_moe.experts.6.w3.weight": "model-00046-of-00193.safetensors",
+ "model.layers.18.block_sparse_moe.experts.7.w1.weight": "model-00046-of-00193.safetensors",
+ "model.layers.18.block_sparse_moe.experts.7.w2.weight": "model-00046-of-00193.safetensors",
+ "model.layers.18.block_sparse_moe.experts.7.w3.weight": "model-00046-of-00193.safetensors",
+ "model.layers.18.block_sparse_moe.gate.weight": "model-00044-of-00193.safetensors",
+ "model.layers.18.input_layernorm.weight": "model-00046-of-00193.safetensors",
+ "model.layers.18.post_attention_layernorm.weight": "model-00046-of-00193.safetensors",
+ "model.layers.18.self_attn.k_proj.weight": "model-00044-of-00193.safetensors",
+ "model.layers.18.self_attn.o_proj.weight": "model-00044-of-00193.safetensors",
+ "model.layers.18.self_attn.q_proj.weight": "model-00044-of-00193.safetensors",
+ "model.layers.18.self_attn.v_proj.weight": "model-00044-of-00193.safetensors",
+ "model.layers.19.block_sparse_moe.experts.0.w1.weight": "model-00047-of-00193.safetensors",
+ "model.layers.19.block_sparse_moe.experts.0.w2.weight": "model-00047-of-00193.safetensors",
+ "model.layers.19.block_sparse_moe.experts.0.w3.weight": "model-00047-of-00193.safetensors",
+ "model.layers.19.block_sparse_moe.experts.1.w1.weight": "model-00047-of-00193.safetensors",
+ "model.layers.19.block_sparse_moe.experts.1.w2.weight": "model-00047-of-00193.safetensors",
+ "model.layers.19.block_sparse_moe.experts.1.w3.weight": "model-00047-of-00193.safetensors",
+ "model.layers.19.block_sparse_moe.experts.2.w1.weight": "model-00047-of-00193.safetensors",
+ "model.layers.19.block_sparse_moe.experts.2.w2.weight": "model-00047-of-00193.safetensors",
+ "model.layers.19.block_sparse_moe.experts.2.w3.weight": "model-00047-of-00193.safetensors",
+ "model.layers.19.block_sparse_moe.experts.3.w1.weight": "model-00047-of-00193.safetensors",
+ "model.layers.19.block_sparse_moe.experts.3.w2.weight": "model-00048-of-00193.safetensors",
+ "model.layers.19.block_sparse_moe.experts.3.w3.weight": "model-00048-of-00193.safetensors",
+ "model.layers.19.block_sparse_moe.experts.4.w1.weight": "model-00048-of-00193.safetensors",
+ "model.layers.19.block_sparse_moe.experts.4.w2.weight": "model-00048-of-00193.safetensors",
+ "model.layers.19.block_sparse_moe.experts.4.w3.weight": "model-00048-of-00193.safetensors",
+ "model.layers.19.block_sparse_moe.experts.5.w1.weight": "model-00048-of-00193.safetensors",
+ "model.layers.19.block_sparse_moe.experts.5.w2.weight": "model-00048-of-00193.safetensors",
+ "model.layers.19.block_sparse_moe.experts.5.w3.weight": "model-00048-of-00193.safetensors",
+ "model.layers.19.block_sparse_moe.experts.6.w1.weight": "model-00048-of-00193.safetensors",
+ "model.layers.19.block_sparse_moe.experts.6.w2.weight": "model-00048-of-00193.safetensors",
+ "model.layers.19.block_sparse_moe.experts.6.w3.weight": "model-00049-of-00193.safetensors",
+ "model.layers.19.block_sparse_moe.experts.7.w1.weight": "model-00049-of-00193.safetensors",
+ "model.layers.19.block_sparse_moe.experts.7.w2.weight": "model-00049-of-00193.safetensors",
+ "model.layers.19.block_sparse_moe.experts.7.w3.weight": "model-00049-of-00193.safetensors",
+ "model.layers.19.block_sparse_moe.gate.weight": "model-00046-of-00193.safetensors",
+ "model.layers.19.input_layernorm.weight": "model-00049-of-00193.safetensors",
+ "model.layers.19.post_attention_layernorm.weight": "model-00049-of-00193.safetensors",
+ "model.layers.19.self_attn.k_proj.weight": "model-00046-of-00193.safetensors",
+ "model.layers.19.self_attn.o_proj.weight": "model-00046-of-00193.safetensors",
+ "model.layers.19.self_attn.q_proj.weight": "model-00046-of-00193.safetensors",
+ "model.layers.19.self_attn.v_proj.weight": "model-00046-of-00193.safetensors",
+ "model.layers.2.block_sparse_moe.experts.0.w1.weight": "model-00006-of-00193.safetensors",
+ "model.layers.2.block_sparse_moe.experts.0.w2.weight": "model-00006-of-00193.safetensors",
+ "model.layers.2.block_sparse_moe.experts.0.w3.weight": "model-00006-of-00193.safetensors",
+ "model.layers.2.block_sparse_moe.experts.1.w1.weight": "model-00006-of-00193.safetensors",
+ "model.layers.2.block_sparse_moe.experts.1.w2.weight": "model-00006-of-00193.safetensors",
+ "model.layers.2.block_sparse_moe.experts.1.w3.weight": "model-00006-of-00193.safetensors",
+ "model.layers.2.block_sparse_moe.experts.2.w1.weight": "model-00006-of-00193.safetensors",
+ "model.layers.2.block_sparse_moe.experts.2.w2.weight": "model-00006-of-00193.safetensors",
+ "model.layers.2.block_sparse_moe.experts.2.w3.weight": "model-00007-of-00193.safetensors",
+ "model.layers.2.block_sparse_moe.experts.3.w1.weight": "model-00007-of-00193.safetensors",
+ "model.layers.2.block_sparse_moe.experts.3.w2.weight": "model-00007-of-00193.safetensors",
+ "model.layers.2.block_sparse_moe.experts.3.w3.weight": "model-00007-of-00193.safetensors",
+ "model.layers.2.block_sparse_moe.experts.4.w1.weight": "model-00007-of-00193.safetensors",
+ "model.layers.2.block_sparse_moe.experts.4.w2.weight": "model-00007-of-00193.safetensors",
+ "model.layers.2.block_sparse_moe.experts.4.w3.weight": "model-00007-of-00193.safetensors",
+ "model.layers.2.block_sparse_moe.experts.5.w1.weight": "model-00007-of-00193.safetensors",
+ "model.layers.2.block_sparse_moe.experts.5.w2.weight": "model-00007-of-00193.safetensors",
+ "model.layers.2.block_sparse_moe.experts.5.w3.weight": "model-00007-of-00193.safetensors",
+ "model.layers.2.block_sparse_moe.experts.6.w1.weight": "model-00008-of-00193.safetensors",
+ "model.layers.2.block_sparse_moe.experts.6.w2.weight": "model-00008-of-00193.safetensors",
+ "model.layers.2.block_sparse_moe.experts.6.w3.weight": "model-00008-of-00193.safetensors",
+ "model.layers.2.block_sparse_moe.experts.7.w1.weight": "model-00008-of-00193.safetensors",
+ "model.layers.2.block_sparse_moe.experts.7.w2.weight": "model-00008-of-00193.safetensors",
+ "model.layers.2.block_sparse_moe.experts.7.w3.weight": "model-00008-of-00193.safetensors",
+ "model.layers.2.block_sparse_moe.gate.weight": "model-00006-of-00193.safetensors",
+ "model.layers.2.input_layernorm.weight": "model-00008-of-00193.safetensors",
+ "model.layers.2.post_attention_layernorm.weight": "model-00008-of-00193.safetensors",
+ "model.layers.2.self_attn.k_proj.weight": "model-00006-of-00193.safetensors",
+ "model.layers.2.self_attn.o_proj.weight": "model-00006-of-00193.safetensors",
+ "model.layers.2.self_attn.q_proj.weight": "model-00006-of-00193.safetensors",
+ "model.layers.2.self_attn.v_proj.weight": "model-00006-of-00193.safetensors",
+ "model.layers.20.block_sparse_moe.experts.0.w1.weight": "model-00049-of-00193.safetensors",
+ "model.layers.20.block_sparse_moe.experts.0.w2.weight": "model-00049-of-00193.safetensors",
+ "model.layers.20.block_sparse_moe.experts.0.w3.weight": "model-00049-of-00193.safetensors",
+ "model.layers.20.block_sparse_moe.experts.1.w1.weight": "model-00049-of-00193.safetensors",
+ "model.layers.20.block_sparse_moe.experts.1.w2.weight": "model-00049-of-00193.safetensors",
+ "model.layers.20.block_sparse_moe.experts.1.w3.weight": "model-00049-of-00193.safetensors",
+ "model.layers.20.block_sparse_moe.experts.2.w1.weight": "model-00050-of-00193.safetensors",
+ "model.layers.20.block_sparse_moe.experts.2.w2.weight": "model-00050-of-00193.safetensors",
+ "model.layers.20.block_sparse_moe.experts.2.w3.weight": "model-00050-of-00193.safetensors",
+ "model.layers.20.block_sparse_moe.experts.3.w1.weight": "model-00050-of-00193.safetensors",
+ "model.layers.20.block_sparse_moe.experts.3.w2.weight": "model-00050-of-00193.safetensors",
+ "model.layers.20.block_sparse_moe.experts.3.w3.weight": "model-00050-of-00193.safetensors",
+ "model.layers.20.block_sparse_moe.experts.4.w1.weight": "model-00050-of-00193.safetensors",
+ "model.layers.20.block_sparse_moe.experts.4.w2.weight": "model-00050-of-00193.safetensors",
+ "model.layers.20.block_sparse_moe.experts.4.w3.weight": "model-00050-of-00193.safetensors",
+ "model.layers.20.block_sparse_moe.experts.5.w1.weight": "model-00050-of-00193.safetensors",
+ "model.layers.20.block_sparse_moe.experts.5.w2.weight": "model-00051-of-00193.safetensors",
+ "model.layers.20.block_sparse_moe.experts.5.w3.weight": "model-00051-of-00193.safetensors",
+ "model.layers.20.block_sparse_moe.experts.6.w1.weight": "model-00051-of-00193.safetensors",
+ "model.layers.20.block_sparse_moe.experts.6.w2.weight": "model-00051-of-00193.safetensors",
+ "model.layers.20.block_sparse_moe.experts.6.w3.weight": "model-00051-of-00193.safetensors",
+ "model.layers.20.block_sparse_moe.experts.7.w1.weight": "model-00051-of-00193.safetensors",
+ "model.layers.20.block_sparse_moe.experts.7.w2.weight": "model-00051-of-00193.safetensors",
+ "model.layers.20.block_sparse_moe.experts.7.w3.weight": "model-00051-of-00193.safetensors",
+ "model.layers.20.block_sparse_moe.gate.weight": "model-00049-of-00193.safetensors",
+ "model.layers.20.input_layernorm.weight": "model-00051-of-00193.safetensors",
+ "model.layers.20.post_attention_layernorm.weight": "model-00051-of-00193.safetensors",
+ "model.layers.20.self_attn.k_proj.weight": "model-00049-of-00193.safetensors",
+ "model.layers.20.self_attn.o_proj.weight": "model-00049-of-00193.safetensors",
+ "model.layers.20.self_attn.q_proj.weight": "model-00049-of-00193.safetensors",
+ "model.layers.20.self_attn.v_proj.weight": "model-00049-of-00193.safetensors",
+ "model.layers.21.block_sparse_moe.experts.0.w1.weight": "model-00051-of-00193.safetensors",
+ "model.layers.21.block_sparse_moe.experts.0.w2.weight": "model-00051-of-00193.safetensors",
+ "model.layers.21.block_sparse_moe.experts.0.w3.weight": "model-00052-of-00193.safetensors",
+ "model.layers.21.block_sparse_moe.experts.1.w1.weight": "model-00052-of-00193.safetensors",
+ "model.layers.21.block_sparse_moe.experts.1.w2.weight": "model-00052-of-00193.safetensors",
+ "model.layers.21.block_sparse_moe.experts.1.w3.weight": "model-00052-of-00193.safetensors",
+ "model.layers.21.block_sparse_moe.experts.2.w1.weight": "model-00052-of-00193.safetensors",
+ "model.layers.21.block_sparse_moe.experts.2.w2.weight": "model-00052-of-00193.safetensors",
+ "model.layers.21.block_sparse_moe.experts.2.w3.weight": "model-00052-of-00193.safetensors",
+ "model.layers.21.block_sparse_moe.experts.3.w1.weight": "model-00052-of-00193.safetensors",
+ "model.layers.21.block_sparse_moe.experts.3.w2.weight": "model-00052-of-00193.safetensors",
+ "model.layers.21.block_sparse_moe.experts.3.w3.weight": "model-00052-of-00193.safetensors",
+ "model.layers.21.block_sparse_moe.experts.4.w1.weight": "model-00053-of-00193.safetensors",
+ "model.layers.21.block_sparse_moe.experts.4.w2.weight": "model-00053-of-00193.safetensors",
+ "model.layers.21.block_sparse_moe.experts.4.w3.weight": "model-00053-of-00193.safetensors",
+ "model.layers.21.block_sparse_moe.experts.5.w1.weight": "model-00053-of-00193.safetensors",
+ "model.layers.21.block_sparse_moe.experts.5.w2.weight": "model-00053-of-00193.safetensors",
+ "model.layers.21.block_sparse_moe.experts.5.w3.weight": "model-00053-of-00193.safetensors",
+ "model.layers.21.block_sparse_moe.experts.6.w1.weight": "model-00053-of-00193.safetensors",
+ "model.layers.21.block_sparse_moe.experts.6.w2.weight": "model-00053-of-00193.safetensors",
+ "model.layers.21.block_sparse_moe.experts.6.w3.weight": "model-00053-of-00193.safetensors",
+ "model.layers.21.block_sparse_moe.experts.7.w1.weight": "model-00053-of-00193.safetensors",
+ "model.layers.21.block_sparse_moe.experts.7.w2.weight": "model-00054-of-00193.safetensors",
+ "model.layers.21.block_sparse_moe.experts.7.w3.weight": "model-00054-of-00193.safetensors",
+ "model.layers.21.block_sparse_moe.gate.weight": "model-00051-of-00193.safetensors",
+ "model.layers.21.input_layernorm.weight": "model-00054-of-00193.safetensors",
+ "model.layers.21.post_attention_layernorm.weight": "model-00054-of-00193.safetensors",
+ "model.layers.21.self_attn.k_proj.weight": "model-00051-of-00193.safetensors",
+ "model.layers.21.self_attn.o_proj.weight": "model-00051-of-00193.safetensors",
+ "model.layers.21.self_attn.q_proj.weight": "model-00051-of-00193.safetensors",
+ "model.layers.21.self_attn.v_proj.weight": "model-00051-of-00193.safetensors",
+ "model.layers.22.block_sparse_moe.experts.0.w1.weight": "model-00054-of-00193.safetensors",
+ "model.layers.22.block_sparse_moe.experts.0.w2.weight": "model-00054-of-00193.safetensors",
+ "model.layers.22.block_sparse_moe.experts.0.w3.weight": "model-00054-of-00193.safetensors",
+ "model.layers.22.block_sparse_moe.experts.1.w1.weight": "model-00054-of-00193.safetensors",
+ "model.layers.22.block_sparse_moe.experts.1.w2.weight": "model-00054-of-00193.safetensors",
+ "model.layers.22.block_sparse_moe.experts.1.w3.weight": "model-00054-of-00193.safetensors",
+ "model.layers.22.block_sparse_moe.experts.2.w1.weight": "model-00054-of-00193.safetensors",
+ "model.layers.22.block_sparse_moe.experts.2.w2.weight": "model-00054-of-00193.safetensors",
+ "model.layers.22.block_sparse_moe.experts.2.w3.weight": "model-00055-of-00193.safetensors",
+ "model.layers.22.block_sparse_moe.experts.3.w1.weight": "model-00055-of-00193.safetensors",
+ "model.layers.22.block_sparse_moe.experts.3.w2.weight": "model-00055-of-00193.safetensors",
+ "model.layers.22.block_sparse_moe.experts.3.w3.weight": "model-00055-of-00193.safetensors",
+ "model.layers.22.block_sparse_moe.experts.4.w1.weight": "model-00055-of-00193.safetensors",
+ "model.layers.22.block_sparse_moe.experts.4.w2.weight": "model-00055-of-00193.safetensors",
+ "model.layers.22.block_sparse_moe.experts.4.w3.weight": "model-00055-of-00193.safetensors",
+ "model.layers.22.block_sparse_moe.experts.5.w1.weight": "model-00055-of-00193.safetensors",
+ "model.layers.22.block_sparse_moe.experts.5.w2.weight": "model-00055-of-00193.safetensors",
+ "model.layers.22.block_sparse_moe.experts.5.w3.weight": "model-00055-of-00193.safetensors",
+ "model.layers.22.block_sparse_moe.experts.6.w1.weight": "model-00056-of-00193.safetensors",
+ "model.layers.22.block_sparse_moe.experts.6.w2.weight": "model-00056-of-00193.safetensors",
+ "model.layers.22.block_sparse_moe.experts.6.w3.weight": "model-00056-of-00193.safetensors",
+ "model.layers.22.block_sparse_moe.experts.7.w1.weight": "model-00056-of-00193.safetensors",
+ "model.layers.22.block_sparse_moe.experts.7.w2.weight": "model-00056-of-00193.safetensors",
+ "model.layers.22.block_sparse_moe.experts.7.w3.weight": "model-00056-of-00193.safetensors",
+ "model.layers.22.block_sparse_moe.gate.weight": "model-00054-of-00193.safetensors",
+ "model.layers.22.input_layernorm.weight": "model-00056-of-00193.safetensors",
+ "model.layers.22.post_attention_layernorm.weight": "model-00056-of-00193.safetensors",
+ "model.layers.22.self_attn.k_proj.weight": "model-00054-of-00193.safetensors",
+ "model.layers.22.self_attn.o_proj.weight": "model-00054-of-00193.safetensors",
+ "model.layers.22.self_attn.q_proj.weight": "model-00054-of-00193.safetensors",
+ "model.layers.22.self_attn.v_proj.weight": "model-00054-of-00193.safetensors",
+ "model.layers.23.block_sparse_moe.experts.0.w1.weight": "model-00056-of-00193.safetensors",
+ "model.layers.23.block_sparse_moe.experts.0.w2.weight": "model-00056-of-00193.safetensors",
+ "model.layers.23.block_sparse_moe.experts.0.w3.weight": "model-00056-of-00193.safetensors",
+ "model.layers.23.block_sparse_moe.experts.1.w1.weight": "model-00056-of-00193.safetensors",
+ "model.layers.23.block_sparse_moe.experts.1.w2.weight": "model-00057-of-00193.safetensors",
+ "model.layers.23.block_sparse_moe.experts.1.w3.weight": "model-00057-of-00193.safetensors",
+ "model.layers.23.block_sparse_moe.experts.2.w1.weight": "model-00057-of-00193.safetensors",
+ "model.layers.23.block_sparse_moe.experts.2.w2.weight": "model-00057-of-00193.safetensors",
+ "model.layers.23.block_sparse_moe.experts.2.w3.weight": "model-00057-of-00193.safetensors",
+ "model.layers.23.block_sparse_moe.experts.3.w1.weight": "model-00057-of-00193.safetensors",
+ "model.layers.23.block_sparse_moe.experts.3.w2.weight": "model-00057-of-00193.safetensors",
+ "model.layers.23.block_sparse_moe.experts.3.w3.weight": "model-00057-of-00193.safetensors",
+ "model.layers.23.block_sparse_moe.experts.4.w1.weight": "model-00057-of-00193.safetensors",
+ "model.layers.23.block_sparse_moe.experts.4.w2.weight": "model-00057-of-00193.safetensors",
+ "model.layers.23.block_sparse_moe.experts.4.w3.weight": "model-00058-of-00193.safetensors",
+ "model.layers.23.block_sparse_moe.experts.5.w1.weight": "model-00058-of-00193.safetensors",
+ "model.layers.23.block_sparse_moe.experts.5.w2.weight": "model-00058-of-00193.safetensors",
+ "model.layers.23.block_sparse_moe.experts.5.w3.weight": "model-00058-of-00193.safetensors",
+ "model.layers.23.block_sparse_moe.experts.6.w1.weight": "model-00058-of-00193.safetensors",
+ "model.layers.23.block_sparse_moe.experts.6.w2.weight": "model-00058-of-00193.safetensors",
+ "model.layers.23.block_sparse_moe.experts.6.w3.weight": "model-00058-of-00193.safetensors",
+ "model.layers.23.block_sparse_moe.experts.7.w1.weight": "model-00058-of-00193.safetensors",
+ "model.layers.23.block_sparse_moe.experts.7.w2.weight": "model-00058-of-00193.safetensors",
+ "model.layers.23.block_sparse_moe.experts.7.w3.weight": "model-00058-of-00193.safetensors",
+ "model.layers.23.block_sparse_moe.gate.weight": "model-00056-of-00193.safetensors",
+ "model.layers.23.input_layernorm.weight": "model-00058-of-00193.safetensors",
+ "model.layers.23.post_attention_layernorm.weight": "model-00058-of-00193.safetensors",
+ "model.layers.23.self_attn.k_proj.weight": "model-00056-of-00193.safetensors",
+ "model.layers.23.self_attn.o_proj.weight": "model-00056-of-00193.safetensors",
+ "model.layers.23.self_attn.q_proj.weight": "model-00056-of-00193.safetensors",
+ "model.layers.23.self_attn.v_proj.weight": "model-00056-of-00193.safetensors",
+ "model.layers.24.block_sparse_moe.experts.0.w1.weight": "model-00059-of-00193.safetensors",
+ "model.layers.24.block_sparse_moe.experts.0.w2.weight": "model-00059-of-00193.safetensors",
+ "model.layers.24.block_sparse_moe.experts.0.w3.weight": "model-00059-of-00193.safetensors",
+ "model.layers.24.block_sparse_moe.experts.1.w1.weight": "model-00059-of-00193.safetensors",
+ "model.layers.24.block_sparse_moe.experts.1.w2.weight": "model-00059-of-00193.safetensors",
+ "model.layers.24.block_sparse_moe.experts.1.w3.weight": "model-00059-of-00193.safetensors",
+ "model.layers.24.block_sparse_moe.experts.2.w1.weight": "model-00059-of-00193.safetensors",
+ "model.layers.24.block_sparse_moe.experts.2.w2.weight": "model-00059-of-00193.safetensors",
+ "model.layers.24.block_sparse_moe.experts.2.w3.weight": "model-00059-of-00193.safetensors",
+ "model.layers.24.block_sparse_moe.experts.3.w1.weight": "model-00059-of-00193.safetensors",
+ "model.layers.24.block_sparse_moe.experts.3.w2.weight": "model-00060-of-00193.safetensors",
+ "model.layers.24.block_sparse_moe.experts.3.w3.weight": "model-00060-of-00193.safetensors",
+ "model.layers.24.block_sparse_moe.experts.4.w1.weight": "model-00060-of-00193.safetensors",
+ "model.layers.24.block_sparse_moe.experts.4.w2.weight": "model-00060-of-00193.safetensors",
+ "model.layers.24.block_sparse_moe.experts.4.w3.weight": "model-00060-of-00193.safetensors",
+ "model.layers.24.block_sparse_moe.experts.5.w1.weight": "model-00060-of-00193.safetensors",
+ "model.layers.24.block_sparse_moe.experts.5.w2.weight": "model-00060-of-00193.safetensors",
+ "model.layers.24.block_sparse_moe.experts.5.w3.weight": "model-00060-of-00193.safetensors",
+ "model.layers.24.block_sparse_moe.experts.6.w1.weight": "model-00060-of-00193.safetensors",
+ "model.layers.24.block_sparse_moe.experts.6.w2.weight": "model-00060-of-00193.safetensors",
+ "model.layers.24.block_sparse_moe.experts.6.w3.weight": "model-00061-of-00193.safetensors",
+ "model.layers.24.block_sparse_moe.experts.7.w1.weight": "model-00061-of-00193.safetensors",
+ "model.layers.24.block_sparse_moe.experts.7.w2.weight": "model-00061-of-00193.safetensors",
+ "model.layers.24.block_sparse_moe.experts.7.w3.weight": "model-00061-of-00193.safetensors",
+ "model.layers.24.block_sparse_moe.gate.weight": "model-00058-of-00193.safetensors",
+ "model.layers.24.input_layernorm.weight": "model-00061-of-00193.safetensors",
+ "model.layers.24.post_attention_layernorm.weight": "model-00061-of-00193.safetensors",
+ "model.layers.24.self_attn.k_proj.weight": "model-00058-of-00193.safetensors",
+ "model.layers.24.self_attn.o_proj.weight": "model-00058-of-00193.safetensors",
+ "model.layers.24.self_attn.q_proj.weight": "model-00058-of-00193.safetensors",
+ "model.layers.24.self_attn.v_proj.weight": "model-00058-of-00193.safetensors",
+ "model.layers.25.block_sparse_moe.experts.0.w1.weight": "model-00061-of-00193.safetensors",
+ "model.layers.25.block_sparse_moe.experts.0.w2.weight": "model-00061-of-00193.safetensors",
+ "model.layers.25.block_sparse_moe.experts.0.w3.weight": "model-00061-of-00193.safetensors",
+ "model.layers.25.block_sparse_moe.experts.1.w1.weight": "model-00061-of-00193.safetensors",
+ "model.layers.25.block_sparse_moe.experts.1.w2.weight": "model-00061-of-00193.safetensors",
+ "model.layers.25.block_sparse_moe.experts.1.w3.weight": "model-00061-of-00193.safetensors",
+ "model.layers.25.block_sparse_moe.experts.2.w1.weight": "model-00062-of-00193.safetensors",
+ "model.layers.25.block_sparse_moe.experts.2.w2.weight": "model-00062-of-00193.safetensors",
+ "model.layers.25.block_sparse_moe.experts.2.w3.weight": "model-00062-of-00193.safetensors",
+ "model.layers.25.block_sparse_moe.experts.3.w1.weight": "model-00062-of-00193.safetensors",
+ "model.layers.25.block_sparse_moe.experts.3.w2.weight": "model-00062-of-00193.safetensors",
+ "model.layers.25.block_sparse_moe.experts.3.w3.weight": "model-00062-of-00193.safetensors",
+ "model.layers.25.block_sparse_moe.experts.4.w1.weight": "model-00062-of-00193.safetensors",
+ "model.layers.25.block_sparse_moe.experts.4.w2.weight": "model-00062-of-00193.safetensors",
+ "model.layers.25.block_sparse_moe.experts.4.w3.weight": "model-00062-of-00193.safetensors",
+ "model.layers.25.block_sparse_moe.experts.5.w1.weight": "model-00062-of-00193.safetensors",
+ "model.layers.25.block_sparse_moe.experts.5.w2.weight": "model-00063-of-00193.safetensors",
+ "model.layers.25.block_sparse_moe.experts.5.w3.weight": "model-00063-of-00193.safetensors",
+ "model.layers.25.block_sparse_moe.experts.6.w1.weight": "model-00063-of-00193.safetensors",
+ "model.layers.25.block_sparse_moe.experts.6.w2.weight": "model-00063-of-00193.safetensors",
+ "model.layers.25.block_sparse_moe.experts.6.w3.weight": "model-00063-of-00193.safetensors",
+ "model.layers.25.block_sparse_moe.experts.7.w1.weight": "model-00063-of-00193.safetensors",
+ "model.layers.25.block_sparse_moe.experts.7.w2.weight": "model-00063-of-00193.safetensors",
+ "model.layers.25.block_sparse_moe.experts.7.w3.weight": "model-00063-of-00193.safetensors",
+ "model.layers.25.block_sparse_moe.gate.weight": "model-00061-of-00193.safetensors",
+ "model.layers.25.input_layernorm.weight": "model-00063-of-00193.safetensors",
+ "model.layers.25.post_attention_layernorm.weight": "model-00063-of-00193.safetensors",
+ "model.layers.25.self_attn.k_proj.weight": "model-00061-of-00193.safetensors",
+ "model.layers.25.self_attn.o_proj.weight": "model-00061-of-00193.safetensors",
+ "model.layers.25.self_attn.q_proj.weight": "model-00061-of-00193.safetensors",
+ "model.layers.25.self_attn.v_proj.weight": "model-00061-of-00193.safetensors",
+ "model.layers.26.block_sparse_moe.experts.0.w1.weight": "model-00063-of-00193.safetensors",
+ "model.layers.26.block_sparse_moe.experts.0.w2.weight": "model-00063-of-00193.safetensors",
+ "model.layers.26.block_sparse_moe.experts.0.w3.weight": "model-00064-of-00193.safetensors",
+ "model.layers.26.block_sparse_moe.experts.1.w1.weight": "model-00064-of-00193.safetensors",
+ "model.layers.26.block_sparse_moe.experts.1.w2.weight": "model-00064-of-00193.safetensors",
+ "model.layers.26.block_sparse_moe.experts.1.w3.weight": "model-00064-of-00193.safetensors",
+ "model.layers.26.block_sparse_moe.experts.2.w1.weight": "model-00064-of-00193.safetensors",
+ "model.layers.26.block_sparse_moe.experts.2.w2.weight": "model-00064-of-00193.safetensors",
+ "model.layers.26.block_sparse_moe.experts.2.w3.weight": "model-00064-of-00193.safetensors",
+ "model.layers.26.block_sparse_moe.experts.3.w1.weight": "model-00064-of-00193.safetensors",
+ "model.layers.26.block_sparse_moe.experts.3.w2.weight": "model-00064-of-00193.safetensors",
+ "model.layers.26.block_sparse_moe.experts.3.w3.weight": "model-00064-of-00193.safetensors",
+ "model.layers.26.block_sparse_moe.experts.4.w1.weight": "model-00065-of-00193.safetensors",
+ "model.layers.26.block_sparse_moe.experts.4.w2.weight": "model-00065-of-00193.safetensors",
+ "model.layers.26.block_sparse_moe.experts.4.w3.weight": "model-00065-of-00193.safetensors",
+ "model.layers.26.block_sparse_moe.experts.5.w1.weight": "model-00065-of-00193.safetensors",
+ "model.layers.26.block_sparse_moe.experts.5.w2.weight": "model-00065-of-00193.safetensors",
+ "model.layers.26.block_sparse_moe.experts.5.w3.weight": "model-00065-of-00193.safetensors",
+ "model.layers.26.block_sparse_moe.experts.6.w1.weight": "model-00065-of-00193.safetensors",
+ "model.layers.26.block_sparse_moe.experts.6.w2.weight": "model-00065-of-00193.safetensors",
+ "model.layers.26.block_sparse_moe.experts.6.w3.weight": "model-00065-of-00193.safetensors",
+ "model.layers.26.block_sparse_moe.experts.7.w1.weight": "model-00065-of-00193.safetensors",
+ "model.layers.26.block_sparse_moe.experts.7.w2.weight": "model-00066-of-00193.safetensors",
+ "model.layers.26.block_sparse_moe.experts.7.w3.weight": "model-00066-of-00193.safetensors",
+ "model.layers.26.block_sparse_moe.gate.weight": "model-00063-of-00193.safetensors",
+ "model.layers.26.input_layernorm.weight": "model-00066-of-00193.safetensors",
+ "model.layers.26.post_attention_layernorm.weight": "model-00066-of-00193.safetensors",
+ "model.layers.26.self_attn.k_proj.weight": "model-00063-of-00193.safetensors",
+ "model.layers.26.self_attn.o_proj.weight": "model-00063-of-00193.safetensors",
+ "model.layers.26.self_attn.q_proj.weight": "model-00063-of-00193.safetensors",
+ "model.layers.26.self_attn.v_proj.weight": "model-00063-of-00193.safetensors",
+ "model.layers.27.block_sparse_moe.experts.0.w1.weight": "model-00066-of-00193.safetensors",
+ "model.layers.27.block_sparse_moe.experts.0.w2.weight": "model-00066-of-00193.safetensors",
+ "model.layers.27.block_sparse_moe.experts.0.w3.weight": "model-00066-of-00193.safetensors",
+ "model.layers.27.block_sparse_moe.experts.1.w1.weight": "model-00066-of-00193.safetensors",
+ "model.layers.27.block_sparse_moe.experts.1.w2.weight": "model-00066-of-00193.safetensors",
+ "model.layers.27.block_sparse_moe.experts.1.w3.weight": "model-00066-of-00193.safetensors",
+ "model.layers.27.block_sparse_moe.experts.2.w1.weight": "model-00066-of-00193.safetensors",
+ "model.layers.27.block_sparse_moe.experts.2.w2.weight": "model-00066-of-00193.safetensors",
+ "model.layers.27.block_sparse_moe.experts.2.w3.weight": "model-00067-of-00193.safetensors",
+ "model.layers.27.block_sparse_moe.experts.3.w1.weight": "model-00067-of-00193.safetensors",
+ "model.layers.27.block_sparse_moe.experts.3.w2.weight": "model-00067-of-00193.safetensors",
+ "model.layers.27.block_sparse_moe.experts.3.w3.weight": "model-00067-of-00193.safetensors",
+ "model.layers.27.block_sparse_moe.experts.4.w1.weight": "model-00067-of-00193.safetensors",
+ "model.layers.27.block_sparse_moe.experts.4.w2.weight": "model-00067-of-00193.safetensors",
+ "model.layers.27.block_sparse_moe.experts.4.w3.weight": "model-00067-of-00193.safetensors",
+ "model.layers.27.block_sparse_moe.experts.5.w1.weight": "model-00067-of-00193.safetensors",
+ "model.layers.27.block_sparse_moe.experts.5.w2.weight": "model-00067-of-00193.safetensors",
+ "model.layers.27.block_sparse_moe.experts.5.w3.weight": "model-00067-of-00193.safetensors",
+ "model.layers.27.block_sparse_moe.experts.6.w1.weight": "model-00068-of-00193.safetensors",
+ "model.layers.27.block_sparse_moe.experts.6.w2.weight": "model-00068-of-00193.safetensors",
+ "model.layers.27.block_sparse_moe.experts.6.w3.weight": "model-00068-of-00193.safetensors",
+ "model.layers.27.block_sparse_moe.experts.7.w1.weight": "model-00068-of-00193.safetensors",
+ "model.layers.27.block_sparse_moe.experts.7.w2.weight": "model-00068-of-00193.safetensors",
+ "model.layers.27.block_sparse_moe.experts.7.w3.weight": "model-00068-of-00193.safetensors",
+ "model.layers.27.block_sparse_moe.gate.weight": "model-00066-of-00193.safetensors",
+ "model.layers.27.input_layernorm.weight": "model-00068-of-00193.safetensors",
+ "model.layers.27.post_attention_layernorm.weight": "model-00068-of-00193.safetensors",
+ "model.layers.27.self_attn.k_proj.weight": "model-00066-of-00193.safetensors",
+ "model.layers.27.self_attn.o_proj.weight": "model-00066-of-00193.safetensors",
+ "model.layers.27.self_attn.q_proj.weight": "model-00066-of-00193.safetensors",
+ "model.layers.27.self_attn.v_proj.weight": "model-00066-of-00193.safetensors",
+ "model.layers.28.block_sparse_moe.experts.0.w1.weight": "model-00068-of-00193.safetensors",
+ "model.layers.28.block_sparse_moe.experts.0.w2.weight": "model-00068-of-00193.safetensors",
+ "model.layers.28.block_sparse_moe.experts.0.w3.weight": "model-00068-of-00193.safetensors",
+ "model.layers.28.block_sparse_moe.experts.1.w1.weight": "model-00068-of-00193.safetensors",
+ "model.layers.28.block_sparse_moe.experts.1.w2.weight": "model-00069-of-00193.safetensors",
+ "model.layers.28.block_sparse_moe.experts.1.w3.weight": "model-00069-of-00193.safetensors",
+ "model.layers.28.block_sparse_moe.experts.2.w1.weight": "model-00069-of-00193.safetensors",
+ "model.layers.28.block_sparse_moe.experts.2.w2.weight": "model-00069-of-00193.safetensors",
+ "model.layers.28.block_sparse_moe.experts.2.w3.weight": "model-00069-of-00193.safetensors",
+ "model.layers.28.block_sparse_moe.experts.3.w1.weight": "model-00069-of-00193.safetensors",
+ "model.layers.28.block_sparse_moe.experts.3.w2.weight": "model-00069-of-00193.safetensors",
+ "model.layers.28.block_sparse_moe.experts.3.w3.weight": "model-00069-of-00193.safetensors",
+ "model.layers.28.block_sparse_moe.experts.4.w1.weight": "model-00069-of-00193.safetensors",
+ "model.layers.28.block_sparse_moe.experts.4.w2.weight": "model-00069-of-00193.safetensors",
+ "model.layers.28.block_sparse_moe.experts.4.w3.weight": "model-00070-of-00193.safetensors",
+ "model.layers.28.block_sparse_moe.experts.5.w1.weight": "model-00070-of-00193.safetensors",
+ "model.layers.28.block_sparse_moe.experts.5.w2.weight": "model-00070-of-00193.safetensors",
+ "model.layers.28.block_sparse_moe.experts.5.w3.weight": "model-00070-of-00193.safetensors",
+ "model.layers.28.block_sparse_moe.experts.6.w1.weight": "model-00070-of-00193.safetensors",
+ "model.layers.28.block_sparse_moe.experts.6.w2.weight": "model-00070-of-00193.safetensors",
+ "model.layers.28.block_sparse_moe.experts.6.w3.weight": "model-00070-of-00193.safetensors",
+ "model.layers.28.block_sparse_moe.experts.7.w1.weight": "model-00070-of-00193.safetensors",
+ "model.layers.28.block_sparse_moe.experts.7.w2.weight": "model-00070-of-00193.safetensors",
+ "model.layers.28.block_sparse_moe.experts.7.w3.weight": "model-00070-of-00193.safetensors",
+ "model.layers.28.block_sparse_moe.gate.weight": "model-00068-of-00193.safetensors",
+ "model.layers.28.input_layernorm.weight": "model-00070-of-00193.safetensors",
+ "model.layers.28.post_attention_layernorm.weight": "model-00070-of-00193.safetensors",
+ "model.layers.28.self_attn.k_proj.weight": "model-00068-of-00193.safetensors",
+ "model.layers.28.self_attn.o_proj.weight": "model-00068-of-00193.safetensors",
+ "model.layers.28.self_attn.q_proj.weight": "model-00068-of-00193.safetensors",
+ "model.layers.28.self_attn.v_proj.weight": "model-00068-of-00193.safetensors",
+ "model.layers.29.block_sparse_moe.experts.0.w1.weight": "model-00071-of-00193.safetensors",
+ "model.layers.29.block_sparse_moe.experts.0.w2.weight": "model-00071-of-00193.safetensors",
+ "model.layers.29.block_sparse_moe.experts.0.w3.weight": "model-00071-of-00193.safetensors",
+ "model.layers.29.block_sparse_moe.experts.1.w1.weight": "model-00071-of-00193.safetensors",
+ "model.layers.29.block_sparse_moe.experts.1.w2.weight": "model-00071-of-00193.safetensors",
+ "model.layers.29.block_sparse_moe.experts.1.w3.weight": "model-00071-of-00193.safetensors",
+ "model.layers.29.block_sparse_moe.experts.2.w1.weight": "model-00071-of-00193.safetensors",
+ "model.layers.29.block_sparse_moe.experts.2.w2.weight": "model-00071-of-00193.safetensors",
+ "model.layers.29.block_sparse_moe.experts.2.w3.weight": "model-00071-of-00193.safetensors",
+ "model.layers.29.block_sparse_moe.experts.3.w1.weight": "model-00071-of-00193.safetensors",
+ "model.layers.29.block_sparse_moe.experts.3.w2.weight": "model-00072-of-00193.safetensors",
+ "model.layers.29.block_sparse_moe.experts.3.w3.weight": "model-00072-of-00193.safetensors",
+ "model.layers.29.block_sparse_moe.experts.4.w1.weight": "model-00072-of-00193.safetensors",
+ "model.layers.29.block_sparse_moe.experts.4.w2.weight": "model-00072-of-00193.safetensors",
+ "model.layers.29.block_sparse_moe.experts.4.w3.weight": "model-00072-of-00193.safetensors",
+ "model.layers.29.block_sparse_moe.experts.5.w1.weight": "model-00072-of-00193.safetensors",
+ "model.layers.29.block_sparse_moe.experts.5.w2.weight": "model-00072-of-00193.safetensors",
+ "model.layers.29.block_sparse_moe.experts.5.w3.weight": "model-00072-of-00193.safetensors",
+ "model.layers.29.block_sparse_moe.experts.6.w1.weight": "model-00072-of-00193.safetensors",
+ "model.layers.29.block_sparse_moe.experts.6.w2.weight": "model-00072-of-00193.safetensors",
+ "model.layers.29.block_sparse_moe.experts.6.w3.weight": "model-00073-of-00193.safetensors",
+ "model.layers.29.block_sparse_moe.experts.7.w1.weight": "model-00073-of-00193.safetensors",
+ "model.layers.29.block_sparse_moe.experts.7.w2.weight": "model-00073-of-00193.safetensors",
+ "model.layers.29.block_sparse_moe.experts.7.w3.weight": "model-00073-of-00193.safetensors",
+ "model.layers.29.block_sparse_moe.gate.weight": "model-00070-of-00193.safetensors",
+ "model.layers.29.input_layernorm.weight": "model-00073-of-00193.safetensors",
+ "model.layers.29.post_attention_layernorm.weight": "model-00073-of-00193.safetensors",
+ "model.layers.29.self_attn.k_proj.weight": "model-00070-of-00193.safetensors",
+ "model.layers.29.self_attn.o_proj.weight": "model-00070-of-00193.safetensors",
+ "model.layers.29.self_attn.q_proj.weight": "model-00070-of-00193.safetensors",
+ "model.layers.29.self_attn.v_proj.weight": "model-00070-of-00193.safetensors",
+ "model.layers.3.block_sparse_moe.experts.0.w1.weight": "model-00008-of-00193.safetensors",
+ "model.layers.3.block_sparse_moe.experts.0.w2.weight": "model-00008-of-00193.safetensors",
+ "model.layers.3.block_sparse_moe.experts.0.w3.weight": "model-00008-of-00193.safetensors",
+ "model.layers.3.block_sparse_moe.experts.1.w1.weight": "model-00008-of-00193.safetensors",
+ "model.layers.3.block_sparse_moe.experts.1.w2.weight": "model-00009-of-00193.safetensors",
+ "model.layers.3.block_sparse_moe.experts.1.w3.weight": "model-00009-of-00193.safetensors",
+ "model.layers.3.block_sparse_moe.experts.2.w1.weight": "model-00009-of-00193.safetensors",
+ "model.layers.3.block_sparse_moe.experts.2.w2.weight": "model-00009-of-00193.safetensors",
+ "model.layers.3.block_sparse_moe.experts.2.w3.weight": "model-00009-of-00193.safetensors",
+ "model.layers.3.block_sparse_moe.experts.3.w1.weight": "model-00009-of-00193.safetensors",
+ "model.layers.3.block_sparse_moe.experts.3.w2.weight": "model-00009-of-00193.safetensors",
+ "model.layers.3.block_sparse_moe.experts.3.w3.weight": "model-00009-of-00193.safetensors",
+ "model.layers.3.block_sparse_moe.experts.4.w1.weight": "model-00009-of-00193.safetensors",
+ "model.layers.3.block_sparse_moe.experts.4.w2.weight": "model-00009-of-00193.safetensors",
+ "model.layers.3.block_sparse_moe.experts.4.w3.weight": "model-00010-of-00193.safetensors",
+ "model.layers.3.block_sparse_moe.experts.5.w1.weight": "model-00010-of-00193.safetensors",
+ "model.layers.3.block_sparse_moe.experts.5.w2.weight": "model-00010-of-00193.safetensors",
+ "model.layers.3.block_sparse_moe.experts.5.w3.weight": "model-00010-of-00193.safetensors",
+ "model.layers.3.block_sparse_moe.experts.6.w1.weight": "model-00010-of-00193.safetensors",
+ "model.layers.3.block_sparse_moe.experts.6.w2.weight": "model-00010-of-00193.safetensors",
+ "model.layers.3.block_sparse_moe.experts.6.w3.weight": "model-00010-of-00193.safetensors",
+ "model.layers.3.block_sparse_moe.experts.7.w1.weight": "model-00010-of-00193.safetensors",
+ "model.layers.3.block_sparse_moe.experts.7.w2.weight": "model-00010-of-00193.safetensors",
+ "model.layers.3.block_sparse_moe.experts.7.w3.weight": "model-00010-of-00193.safetensors",
+ "model.layers.3.block_sparse_moe.gate.weight": "model-00008-of-00193.safetensors",
+ "model.layers.3.input_layernorm.weight": "model-00010-of-00193.safetensors",
+ "model.layers.3.post_attention_layernorm.weight": "model-00010-of-00193.safetensors",
+ "model.layers.3.self_attn.k_proj.weight": "model-00008-of-00193.safetensors",
+ "model.layers.3.self_attn.o_proj.weight": "model-00008-of-00193.safetensors",
+ "model.layers.3.self_attn.q_proj.weight": "model-00008-of-00193.safetensors",
+ "model.layers.3.self_attn.v_proj.weight": "model-00008-of-00193.safetensors",
+ "model.layers.30.block_sparse_moe.experts.0.w1.weight": "model-00073-of-00193.safetensors",
+ "model.layers.30.block_sparse_moe.experts.0.w2.weight": "model-00073-of-00193.safetensors",
+ "model.layers.30.block_sparse_moe.experts.0.w3.weight": "model-00073-of-00193.safetensors",
+ "model.layers.30.block_sparse_moe.experts.1.w1.weight": "model-00073-of-00193.safetensors",
+ "model.layers.30.block_sparse_moe.experts.1.w2.weight": "model-00073-of-00193.safetensors",
+ "model.layers.30.block_sparse_moe.experts.1.w3.weight": "model-00073-of-00193.safetensors",
+ "model.layers.30.block_sparse_moe.experts.2.w1.weight": "model-00074-of-00193.safetensors",
+ "model.layers.30.block_sparse_moe.experts.2.w2.weight": "model-00074-of-00193.safetensors",
+ "model.layers.30.block_sparse_moe.experts.2.w3.weight": "model-00074-of-00193.safetensors",
+ "model.layers.30.block_sparse_moe.experts.3.w1.weight": "model-00074-of-00193.safetensors",
+ "model.layers.30.block_sparse_moe.experts.3.w2.weight": "model-00074-of-00193.safetensors",
+ "model.layers.30.block_sparse_moe.experts.3.w3.weight": "model-00074-of-00193.safetensors",
+ "model.layers.30.block_sparse_moe.experts.4.w1.weight": "model-00074-of-00193.safetensors",
+ "model.layers.30.block_sparse_moe.experts.4.w2.weight": "model-00074-of-00193.safetensors",
+ "model.layers.30.block_sparse_moe.experts.4.w3.weight": "model-00074-of-00193.safetensors",
+ "model.layers.30.block_sparse_moe.experts.5.w1.weight": "model-00074-of-00193.safetensors",
+ "model.layers.30.block_sparse_moe.experts.5.w2.weight": "model-00075-of-00193.safetensors",
+ "model.layers.30.block_sparse_moe.experts.5.w3.weight": "model-00075-of-00193.safetensors",
+ "model.layers.30.block_sparse_moe.experts.6.w1.weight": "model-00075-of-00193.safetensors",
+ "model.layers.30.block_sparse_moe.experts.6.w2.weight": "model-00075-of-00193.safetensors",
+ "model.layers.30.block_sparse_moe.experts.6.w3.weight": "model-00075-of-00193.safetensors",
+ "model.layers.30.block_sparse_moe.experts.7.w1.weight": "model-00075-of-00193.safetensors",
+ "model.layers.30.block_sparse_moe.experts.7.w2.weight": "model-00075-of-00193.safetensors",
+ "model.layers.30.block_sparse_moe.experts.7.w3.weight": "model-00075-of-00193.safetensors",
+ "model.layers.30.block_sparse_moe.gate.weight": "model-00073-of-00193.safetensors",
+ "model.layers.30.input_layernorm.weight": "model-00075-of-00193.safetensors",
+ "model.layers.30.post_attention_layernorm.weight": "model-00075-of-00193.safetensors",
+ "model.layers.30.self_attn.k_proj.weight": "model-00073-of-00193.safetensors",
+ "model.layers.30.self_attn.o_proj.weight": "model-00073-of-00193.safetensors",
+ "model.layers.30.self_attn.q_proj.weight": "model-00073-of-00193.safetensors",
+ "model.layers.30.self_attn.v_proj.weight": "model-00073-of-00193.safetensors",
+ "model.layers.31.block_sparse_moe.experts.0.w1.weight": "model-00075-of-00193.safetensors",
+ "model.layers.31.block_sparse_moe.experts.0.w2.weight": "model-00075-of-00193.safetensors",
+ "model.layers.31.block_sparse_moe.experts.0.w3.weight": "model-00076-of-00193.safetensors",
+ "model.layers.31.block_sparse_moe.experts.1.w1.weight": "model-00076-of-00193.safetensors",
+ "model.layers.31.block_sparse_moe.experts.1.w2.weight": "model-00076-of-00193.safetensors",
+ "model.layers.31.block_sparse_moe.experts.1.w3.weight": "model-00076-of-00193.safetensors",
+ "model.layers.31.block_sparse_moe.experts.2.w1.weight": "model-00076-of-00193.safetensors",
+ "model.layers.31.block_sparse_moe.experts.2.w2.weight": "model-00076-of-00193.safetensors",
+ "model.layers.31.block_sparse_moe.experts.2.w3.weight": "model-00076-of-00193.safetensors",
+ "model.layers.31.block_sparse_moe.experts.3.w1.weight": "model-00076-of-00193.safetensors",
+ "model.layers.31.block_sparse_moe.experts.3.w2.weight": "model-00076-of-00193.safetensors",
+ "model.layers.31.block_sparse_moe.experts.3.w3.weight": "model-00076-of-00193.safetensors",
+ "model.layers.31.block_sparse_moe.experts.4.w1.weight": "model-00077-of-00193.safetensors",
+ "model.layers.31.block_sparse_moe.experts.4.w2.weight": "model-00077-of-00193.safetensors",
+ "model.layers.31.block_sparse_moe.experts.4.w3.weight": "model-00077-of-00193.safetensors",
+ "model.layers.31.block_sparse_moe.experts.5.w1.weight": "model-00077-of-00193.safetensors",
+ "model.layers.31.block_sparse_moe.experts.5.w2.weight": "model-00077-of-00193.safetensors",
+ "model.layers.31.block_sparse_moe.experts.5.w3.weight": "model-00077-of-00193.safetensors",
+ "model.layers.31.block_sparse_moe.experts.6.w1.weight": "model-00077-of-00193.safetensors",
+ "model.layers.31.block_sparse_moe.experts.6.w2.weight": "model-00077-of-00193.safetensors",
+ "model.layers.31.block_sparse_moe.experts.6.w3.weight": "model-00077-of-00193.safetensors",
+ "model.layers.31.block_sparse_moe.experts.7.w1.weight": "model-00077-of-00193.safetensors",
+ "model.layers.31.block_sparse_moe.experts.7.w2.weight": "model-00078-of-00193.safetensors",
+ "model.layers.31.block_sparse_moe.experts.7.w3.weight": "model-00078-of-00193.safetensors",
+ "model.layers.31.block_sparse_moe.gate.weight": "model-00075-of-00193.safetensors",
+ "model.layers.31.input_layernorm.weight": "model-00078-of-00193.safetensors",
+ "model.layers.31.post_attention_layernorm.weight": "model-00078-of-00193.safetensors",
+ "model.layers.31.self_attn.k_proj.weight": "model-00075-of-00193.safetensors",
+ "model.layers.31.self_attn.o_proj.weight": "model-00075-of-00193.safetensors",
+ "model.layers.31.self_attn.q_proj.weight": "model-00075-of-00193.safetensors",
+ "model.layers.31.self_attn.v_proj.weight": "model-00075-of-00193.safetensors",
+ "model.layers.32.block_sparse_moe.experts.0.w1.weight": "model-00078-of-00193.safetensors",
+ "model.layers.32.block_sparse_moe.experts.0.w2.weight": "model-00078-of-00193.safetensors",
+ "model.layers.32.block_sparse_moe.experts.0.w3.weight": "model-00078-of-00193.safetensors",
+ "model.layers.32.block_sparse_moe.experts.1.w1.weight": "model-00078-of-00193.safetensors",
+ "model.layers.32.block_sparse_moe.experts.1.w2.weight": "model-00078-of-00193.safetensors",
+ "model.layers.32.block_sparse_moe.experts.1.w3.weight": "model-00078-of-00193.safetensors",
+ "model.layers.32.block_sparse_moe.experts.2.w1.weight": "model-00078-of-00193.safetensors",
+ "model.layers.32.block_sparse_moe.experts.2.w2.weight": "model-00078-of-00193.safetensors",
+ "model.layers.32.block_sparse_moe.experts.2.w3.weight": "model-00079-of-00193.safetensors",
+ "model.layers.32.block_sparse_moe.experts.3.w1.weight": "model-00079-of-00193.safetensors",
+ "model.layers.32.block_sparse_moe.experts.3.w2.weight": "model-00079-of-00193.safetensors",
+ "model.layers.32.block_sparse_moe.experts.3.w3.weight": "model-00079-of-00193.safetensors",
+ "model.layers.32.block_sparse_moe.experts.4.w1.weight": "model-00079-of-00193.safetensors",
+ "model.layers.32.block_sparse_moe.experts.4.w2.weight": "model-00079-of-00193.safetensors",
+ "model.layers.32.block_sparse_moe.experts.4.w3.weight": "model-00079-of-00193.safetensors",
+ "model.layers.32.block_sparse_moe.experts.5.w1.weight": "model-00079-of-00193.safetensors",
+ "model.layers.32.block_sparse_moe.experts.5.w2.weight": "model-00079-of-00193.safetensors",
+ "model.layers.32.block_sparse_moe.experts.5.w3.weight": "model-00079-of-00193.safetensors",
+ "model.layers.32.block_sparse_moe.experts.6.w1.weight": "model-00080-of-00193.safetensors",
+ "model.layers.32.block_sparse_moe.experts.6.w2.weight": "model-00080-of-00193.safetensors",
+ "model.layers.32.block_sparse_moe.experts.6.w3.weight": "model-00080-of-00193.safetensors",
+ "model.layers.32.block_sparse_moe.experts.7.w1.weight": "model-00080-of-00193.safetensors",
+ "model.layers.32.block_sparse_moe.experts.7.w2.weight": "model-00080-of-00193.safetensors",
+ "model.layers.32.block_sparse_moe.experts.7.w3.weight": "model-00080-of-00193.safetensors",
+ "model.layers.32.block_sparse_moe.gate.weight": "model-00078-of-00193.safetensors",
+ "model.layers.32.input_layernorm.weight": "model-00080-of-00193.safetensors",
+ "model.layers.32.post_attention_layernorm.weight": "model-00080-of-00193.safetensors",
+ "model.layers.32.self_attn.k_proj.weight": "model-00078-of-00193.safetensors",
+ "model.layers.32.self_attn.o_proj.weight": "model-00078-of-00193.safetensors",
+ "model.layers.32.self_attn.q_proj.weight": "model-00078-of-00193.safetensors",
+ "model.layers.32.self_attn.v_proj.weight": "model-00078-of-00193.safetensors",
+ "model.layers.33.block_sparse_moe.experts.0.w1.weight": "model-00080-of-00193.safetensors",
+ "model.layers.33.block_sparse_moe.experts.0.w2.weight": "model-00080-of-00193.safetensors",
+ "model.layers.33.block_sparse_moe.experts.0.w3.weight": "model-00080-of-00193.safetensors",
+ "model.layers.33.block_sparse_moe.experts.1.w1.weight": "model-00080-of-00193.safetensors",
+ "model.layers.33.block_sparse_moe.experts.1.w2.weight": "model-00081-of-00193.safetensors",
+ "model.layers.33.block_sparse_moe.experts.1.w3.weight": "model-00081-of-00193.safetensors",
+ "model.layers.33.block_sparse_moe.experts.2.w1.weight": "model-00081-of-00193.safetensors",
+ "model.layers.33.block_sparse_moe.experts.2.w2.weight": "model-00081-of-00193.safetensors",
+ "model.layers.33.block_sparse_moe.experts.2.w3.weight": "model-00081-of-00193.safetensors",
+ "model.layers.33.block_sparse_moe.experts.3.w1.weight": "model-00081-of-00193.safetensors",
+ "model.layers.33.block_sparse_moe.experts.3.w2.weight": "model-00081-of-00193.safetensors",
+ "model.layers.33.block_sparse_moe.experts.3.w3.weight": "model-00081-of-00193.safetensors",
+ "model.layers.33.block_sparse_moe.experts.4.w1.weight": "model-00081-of-00193.safetensors",
+ "model.layers.33.block_sparse_moe.experts.4.w2.weight": "model-00081-of-00193.safetensors",
+ "model.layers.33.block_sparse_moe.experts.4.w3.weight": "model-00082-of-00193.safetensors",
+ "model.layers.33.block_sparse_moe.experts.5.w1.weight": "model-00082-of-00193.safetensors",
+ "model.layers.33.block_sparse_moe.experts.5.w2.weight": "model-00082-of-00193.safetensors",
+ "model.layers.33.block_sparse_moe.experts.5.w3.weight": "model-00082-of-00193.safetensors",
+ "model.layers.33.block_sparse_moe.experts.6.w1.weight": "model-00082-of-00193.safetensors",
+ "model.layers.33.block_sparse_moe.experts.6.w2.weight": "model-00082-of-00193.safetensors",
+ "model.layers.33.block_sparse_moe.experts.6.w3.weight": "model-00082-of-00193.safetensors",
+ "model.layers.33.block_sparse_moe.experts.7.w1.weight": "model-00082-of-00193.safetensors",
+ "model.layers.33.block_sparse_moe.experts.7.w2.weight": "model-00082-of-00193.safetensors",
+ "model.layers.33.block_sparse_moe.experts.7.w3.weight": "model-00082-of-00193.safetensors",
+ "model.layers.33.block_sparse_moe.gate.weight": "model-00080-of-00193.safetensors",
+ "model.layers.33.input_layernorm.weight": "model-00082-of-00193.safetensors",
+ "model.layers.33.post_attention_layernorm.weight": "model-00082-of-00193.safetensors",
+ "model.layers.33.self_attn.k_proj.weight": "model-00080-of-00193.safetensors",
+ "model.layers.33.self_attn.o_proj.weight": "model-00080-of-00193.safetensors",
+ "model.layers.33.self_attn.q_proj.weight": "model-00080-of-00193.safetensors",
+ "model.layers.33.self_attn.v_proj.weight": "model-00080-of-00193.safetensors",
+ "model.layers.34.block_sparse_moe.experts.0.w1.weight": "model-00083-of-00193.safetensors",
+ "model.layers.34.block_sparse_moe.experts.0.w2.weight": "model-00083-of-00193.safetensors",
+ "model.layers.34.block_sparse_moe.experts.0.w3.weight": "model-00083-of-00193.safetensors",
+ "model.layers.34.block_sparse_moe.experts.1.w1.weight": "model-00083-of-00193.safetensors",
+ "model.layers.34.block_sparse_moe.experts.1.w2.weight": "model-00083-of-00193.safetensors",
+ "model.layers.34.block_sparse_moe.experts.1.w3.weight": "model-00083-of-00193.safetensors",
+ "model.layers.34.block_sparse_moe.experts.2.w1.weight": "model-00083-of-00193.safetensors",
+ "model.layers.34.block_sparse_moe.experts.2.w2.weight": "model-00083-of-00193.safetensors",
+ "model.layers.34.block_sparse_moe.experts.2.w3.weight": "model-00083-of-00193.safetensors",
+ "model.layers.34.block_sparse_moe.experts.3.w1.weight": "model-00083-of-00193.safetensors",
+ "model.layers.34.block_sparse_moe.experts.3.w2.weight": "model-00084-of-00193.safetensors",
+ "model.layers.34.block_sparse_moe.experts.3.w3.weight": "model-00084-of-00193.safetensors",
+ "model.layers.34.block_sparse_moe.experts.4.w1.weight": "model-00084-of-00193.safetensors",
+ "model.layers.34.block_sparse_moe.experts.4.w2.weight": "model-00084-of-00193.safetensors",
+ "model.layers.34.block_sparse_moe.experts.4.w3.weight": "model-00084-of-00193.safetensors",
+ "model.layers.34.block_sparse_moe.experts.5.w1.weight": "model-00084-of-00193.safetensors",
+ "model.layers.34.block_sparse_moe.experts.5.w2.weight": "model-00084-of-00193.safetensors",
+ "model.layers.34.block_sparse_moe.experts.5.w3.weight": "model-00084-of-00193.safetensors",
+ "model.layers.34.block_sparse_moe.experts.6.w1.weight": "model-00084-of-00193.safetensors",
+ "model.layers.34.block_sparse_moe.experts.6.w2.weight": "model-00084-of-00193.safetensors",
+ "model.layers.34.block_sparse_moe.experts.6.w3.weight": "model-00085-of-00193.safetensors",
+ "model.layers.34.block_sparse_moe.experts.7.w1.weight": "model-00085-of-00193.safetensors",
+ "model.layers.34.block_sparse_moe.experts.7.w2.weight": "model-00085-of-00193.safetensors",
+ "model.layers.34.block_sparse_moe.experts.7.w3.weight": "model-00085-of-00193.safetensors",
+ "model.layers.34.block_sparse_moe.gate.weight": "model-00082-of-00193.safetensors",
+ "model.layers.34.input_layernorm.weight": "model-00085-of-00193.safetensors",
+ "model.layers.34.post_attention_layernorm.weight": "model-00085-of-00193.safetensors",
+ "model.layers.34.self_attn.k_proj.weight": "model-00082-of-00193.safetensors",
+ "model.layers.34.self_attn.o_proj.weight": "model-00082-of-00193.safetensors",
+ "model.layers.34.self_attn.q_proj.weight": "model-00082-of-00193.safetensors",
+ "model.layers.34.self_attn.v_proj.weight": "model-00082-of-00193.safetensors",
+ "model.layers.35.block_sparse_moe.experts.0.w1.weight": "model-00085-of-00193.safetensors",
+ "model.layers.35.block_sparse_moe.experts.0.w2.weight": "model-00085-of-00193.safetensors",
+ "model.layers.35.block_sparse_moe.experts.0.w3.weight": "model-00085-of-00193.safetensors",
+ "model.layers.35.block_sparse_moe.experts.1.w1.weight": "model-00085-of-00193.safetensors",
+ "model.layers.35.block_sparse_moe.experts.1.w2.weight": "model-00085-of-00193.safetensors",
+ "model.layers.35.block_sparse_moe.experts.1.w3.weight": "model-00085-of-00193.safetensors",
+ "model.layers.35.block_sparse_moe.experts.2.w1.weight": "model-00086-of-00193.safetensors",
+ "model.layers.35.block_sparse_moe.experts.2.w2.weight": "model-00086-of-00193.safetensors",
+ "model.layers.35.block_sparse_moe.experts.2.w3.weight": "model-00086-of-00193.safetensors",
+ "model.layers.35.block_sparse_moe.experts.3.w1.weight": "model-00086-of-00193.safetensors",
+ "model.layers.35.block_sparse_moe.experts.3.w2.weight": "model-00086-of-00193.safetensors",
+ "model.layers.35.block_sparse_moe.experts.3.w3.weight": "model-00086-of-00193.safetensors",
+ "model.layers.35.block_sparse_moe.experts.4.w1.weight": "model-00086-of-00193.safetensors",
+ "model.layers.35.block_sparse_moe.experts.4.w2.weight": "model-00086-of-00193.safetensors",
+ "model.layers.35.block_sparse_moe.experts.4.w3.weight": "model-00086-of-00193.safetensors",
+ "model.layers.35.block_sparse_moe.experts.5.w1.weight": "model-00086-of-00193.safetensors",
+ "model.layers.35.block_sparse_moe.experts.5.w2.weight": "model-00087-of-00193.safetensors",
+ "model.layers.35.block_sparse_moe.experts.5.w3.weight": "model-00087-of-00193.safetensors",
+ "model.layers.35.block_sparse_moe.experts.6.w1.weight": "model-00087-of-00193.safetensors",
+ "model.layers.35.block_sparse_moe.experts.6.w2.weight": "model-00087-of-00193.safetensors",
+ "model.layers.35.block_sparse_moe.experts.6.w3.weight": "model-00087-of-00193.safetensors",
+ "model.layers.35.block_sparse_moe.experts.7.w1.weight": "model-00087-of-00193.safetensors",
+ "model.layers.35.block_sparse_moe.experts.7.w2.weight": "model-00087-of-00193.safetensors",
+ "model.layers.35.block_sparse_moe.experts.7.w3.weight": "model-00087-of-00193.safetensors",
+ "model.layers.35.block_sparse_moe.gate.weight": "model-00085-of-00193.safetensors",
+ "model.layers.35.input_layernorm.weight": "model-00087-of-00193.safetensors",
+ "model.layers.35.post_attention_layernorm.weight": "model-00087-of-00193.safetensors",
+ "model.layers.35.self_attn.k_proj.weight": "model-00085-of-00193.safetensors",
+ "model.layers.35.self_attn.o_proj.weight": "model-00085-of-00193.safetensors",
+ "model.layers.35.self_attn.q_proj.weight": "model-00085-of-00193.safetensors",
+ "model.layers.35.self_attn.v_proj.weight": "model-00085-of-00193.safetensors",
+ "model.layers.36.block_sparse_moe.experts.0.w1.weight": "model-00087-of-00193.safetensors",
+ "model.layers.36.block_sparse_moe.experts.0.w2.weight": "model-00087-of-00193.safetensors",
+ "model.layers.36.block_sparse_moe.experts.0.w3.weight": "model-00088-of-00193.safetensors",
+ "model.layers.36.block_sparse_moe.experts.1.w1.weight": "model-00088-of-00193.safetensors",
+ "model.layers.36.block_sparse_moe.experts.1.w2.weight": "model-00088-of-00193.safetensors",
+ "model.layers.36.block_sparse_moe.experts.1.w3.weight": "model-00088-of-00193.safetensors",
+ "model.layers.36.block_sparse_moe.experts.2.w1.weight": "model-00088-of-00193.safetensors",
+ "model.layers.36.block_sparse_moe.experts.2.w2.weight": "model-00088-of-00193.safetensors",
+ "model.layers.36.block_sparse_moe.experts.2.w3.weight": "model-00088-of-00193.safetensors",
+ "model.layers.36.block_sparse_moe.experts.3.w1.weight": "model-00088-of-00193.safetensors",
+ "model.layers.36.block_sparse_moe.experts.3.w2.weight": "model-00088-of-00193.safetensors",
+ "model.layers.36.block_sparse_moe.experts.3.w3.weight": "model-00088-of-00193.safetensors",
+ "model.layers.36.block_sparse_moe.experts.4.w1.weight": "model-00089-of-00193.safetensors",
+ "model.layers.36.block_sparse_moe.experts.4.w2.weight": "model-00089-of-00193.safetensors",
+ "model.layers.36.block_sparse_moe.experts.4.w3.weight": "model-00089-of-00193.safetensors",
+ "model.layers.36.block_sparse_moe.experts.5.w1.weight": "model-00089-of-00193.safetensors",
+ "model.layers.36.block_sparse_moe.experts.5.w2.weight": "model-00089-of-00193.safetensors",
+ "model.layers.36.block_sparse_moe.experts.5.w3.weight": "model-00089-of-00193.safetensors",
+ "model.layers.36.block_sparse_moe.experts.6.w1.weight": "model-00089-of-00193.safetensors",
+ "model.layers.36.block_sparse_moe.experts.6.w2.weight": "model-00089-of-00193.safetensors",
+ "model.layers.36.block_sparse_moe.experts.6.w3.weight": "model-00089-of-00193.safetensors",
+ "model.layers.36.block_sparse_moe.experts.7.w1.weight": "model-00089-of-00193.safetensors",
+ "model.layers.36.block_sparse_moe.experts.7.w2.weight": "model-00090-of-00193.safetensors",
+ "model.layers.36.block_sparse_moe.experts.7.w3.weight": "model-00090-of-00193.safetensors",
+ "model.layers.36.block_sparse_moe.gate.weight": "model-00087-of-00193.safetensors",
+ "model.layers.36.input_layernorm.weight": "model-00090-of-00193.safetensors",
+ "model.layers.36.post_attention_layernorm.weight": "model-00090-of-00193.safetensors",
+ "model.layers.36.self_attn.k_proj.weight": "model-00087-of-00193.safetensors",
+ "model.layers.36.self_attn.o_proj.weight": "model-00087-of-00193.safetensors",
+ "model.layers.36.self_attn.q_proj.weight": "model-00087-of-00193.safetensors",
+ "model.layers.36.self_attn.v_proj.weight": "model-00087-of-00193.safetensors",
+ "model.layers.37.block_sparse_moe.experts.0.w1.weight": "model-00090-of-00193.safetensors",
+ "model.layers.37.block_sparse_moe.experts.0.w2.weight": "model-00090-of-00193.safetensors",
+ "model.layers.37.block_sparse_moe.experts.0.w3.weight": "model-00090-of-00193.safetensors",
+ "model.layers.37.block_sparse_moe.experts.1.w1.weight": "model-00090-of-00193.safetensors",
+ "model.layers.37.block_sparse_moe.experts.1.w2.weight": "model-00090-of-00193.safetensors",
+ "model.layers.37.block_sparse_moe.experts.1.w3.weight": "model-00090-of-00193.safetensors",
+ "model.layers.37.block_sparse_moe.experts.2.w1.weight": "model-00090-of-00193.safetensors",
+ "model.layers.37.block_sparse_moe.experts.2.w2.weight": "model-00090-of-00193.safetensors",
+ "model.layers.37.block_sparse_moe.experts.2.w3.weight": "model-00091-of-00193.safetensors",
+ "model.layers.37.block_sparse_moe.experts.3.w1.weight": "model-00091-of-00193.safetensors",
+ "model.layers.37.block_sparse_moe.experts.3.w2.weight": "model-00091-of-00193.safetensors",
+ "model.layers.37.block_sparse_moe.experts.3.w3.weight": "model-00091-of-00193.safetensors",
+ "model.layers.37.block_sparse_moe.experts.4.w1.weight": "model-00091-of-00193.safetensors",
+ "model.layers.37.block_sparse_moe.experts.4.w2.weight": "model-00091-of-00193.safetensors",
+ "model.layers.37.block_sparse_moe.experts.4.w3.weight": "model-00091-of-00193.safetensors",
+ "model.layers.37.block_sparse_moe.experts.5.w1.weight": "model-00091-of-00193.safetensors",
+ "model.layers.37.block_sparse_moe.experts.5.w2.weight": "model-00091-of-00193.safetensors",
+ "model.layers.37.block_sparse_moe.experts.5.w3.weight": "model-00091-of-00193.safetensors",
+ "model.layers.37.block_sparse_moe.experts.6.w1.weight": "model-00092-of-00193.safetensors",
+ "model.layers.37.block_sparse_moe.experts.6.w2.weight": "model-00092-of-00193.safetensors",
+ "model.layers.37.block_sparse_moe.experts.6.w3.weight": "model-00092-of-00193.safetensors",
+ "model.layers.37.block_sparse_moe.experts.7.w1.weight": "model-00092-of-00193.safetensors",
+ "model.layers.37.block_sparse_moe.experts.7.w2.weight": "model-00092-of-00193.safetensors",
+ "model.layers.37.block_sparse_moe.experts.7.w3.weight": "model-00092-of-00193.safetensors",
+ "model.layers.37.block_sparse_moe.gate.weight": "model-00090-of-00193.safetensors",
+ "model.layers.37.input_layernorm.weight": "model-00092-of-00193.safetensors",
+ "model.layers.37.post_attention_layernorm.weight": "model-00092-of-00193.safetensors",
+ "model.layers.37.self_attn.k_proj.weight": "model-00090-of-00193.safetensors",
+ "model.layers.37.self_attn.o_proj.weight": "model-00090-of-00193.safetensors",
+ "model.layers.37.self_attn.q_proj.weight": "model-00090-of-00193.safetensors",
+ "model.layers.37.self_attn.v_proj.weight": "model-00090-of-00193.safetensors",
+ "model.layers.38.block_sparse_moe.experts.0.w1.weight": "model-00092-of-00193.safetensors",
+ "model.layers.38.block_sparse_moe.experts.0.w2.weight": "model-00092-of-00193.safetensors",
+ "model.layers.38.block_sparse_moe.experts.0.w3.weight": "model-00092-of-00193.safetensors",
+ "model.layers.38.block_sparse_moe.experts.1.w1.weight": "model-00092-of-00193.safetensors",
+ "model.layers.38.block_sparse_moe.experts.1.w2.weight": "model-00093-of-00193.safetensors",
+ "model.layers.38.block_sparse_moe.experts.1.w3.weight": "model-00093-of-00193.safetensors",
+ "model.layers.38.block_sparse_moe.experts.2.w1.weight": "model-00093-of-00193.safetensors",
+ "model.layers.38.block_sparse_moe.experts.2.w2.weight": "model-00093-of-00193.safetensors",
+ "model.layers.38.block_sparse_moe.experts.2.w3.weight": "model-00093-of-00193.safetensors",
+ "model.layers.38.block_sparse_moe.experts.3.w1.weight": "model-00093-of-00193.safetensors",
+ "model.layers.38.block_sparse_moe.experts.3.w2.weight": "model-00093-of-00193.safetensors",
+ "model.layers.38.block_sparse_moe.experts.3.w3.weight": "model-00093-of-00193.safetensors",
+ "model.layers.38.block_sparse_moe.experts.4.w1.weight": "model-00093-of-00193.safetensors",
+ "model.layers.38.block_sparse_moe.experts.4.w2.weight": "model-00093-of-00193.safetensors",
+ "model.layers.38.block_sparse_moe.experts.4.w3.weight": "model-00094-of-00193.safetensors",
+ "model.layers.38.block_sparse_moe.experts.5.w1.weight": "model-00094-of-00193.safetensors",
+ "model.layers.38.block_sparse_moe.experts.5.w2.weight": "model-00094-of-00193.safetensors",
+ "model.layers.38.block_sparse_moe.experts.5.w3.weight": "model-00094-of-00193.safetensors",
+ "model.layers.38.block_sparse_moe.experts.6.w1.weight": "model-00094-of-00193.safetensors",
+ "model.layers.38.block_sparse_moe.experts.6.w2.weight": "model-00094-of-00193.safetensors",
+ "model.layers.38.block_sparse_moe.experts.6.w3.weight": "model-00094-of-00193.safetensors",
+ "model.layers.38.block_sparse_moe.experts.7.w1.weight": "model-00094-of-00193.safetensors",
+ "model.layers.38.block_sparse_moe.experts.7.w2.weight": "model-00094-of-00193.safetensors",
+ "model.layers.38.block_sparse_moe.experts.7.w3.weight": "model-00094-of-00193.safetensors",
+ "model.layers.38.block_sparse_moe.gate.weight": "model-00092-of-00193.safetensors",
+ "model.layers.38.input_layernorm.weight": "model-00094-of-00193.safetensors",
+ "model.layers.38.post_attention_layernorm.weight": "model-00094-of-00193.safetensors",
+ "model.layers.38.self_attn.k_proj.weight": "model-00092-of-00193.safetensors",
+ "model.layers.38.self_attn.o_proj.weight": "model-00092-of-00193.safetensors",
+ "model.layers.38.self_attn.q_proj.weight": "model-00092-of-00193.safetensors",
+ "model.layers.38.self_attn.v_proj.weight": "model-00092-of-00193.safetensors",
+ "model.layers.39.block_sparse_moe.experts.0.w1.weight": "model-00095-of-00193.safetensors",
+ "model.layers.39.block_sparse_moe.experts.0.w2.weight": "model-00095-of-00193.safetensors",
+ "model.layers.39.block_sparse_moe.experts.0.w3.weight": "model-00095-of-00193.safetensors",
+ "model.layers.39.block_sparse_moe.experts.1.w1.weight": "model-00095-of-00193.safetensors",
+ "model.layers.39.block_sparse_moe.experts.1.w2.weight": "model-00095-of-00193.safetensors",
+ "model.layers.39.block_sparse_moe.experts.1.w3.weight": "model-00095-of-00193.safetensors",
+ "model.layers.39.block_sparse_moe.experts.2.w1.weight": "model-00095-of-00193.safetensors",
+ "model.layers.39.block_sparse_moe.experts.2.w2.weight": "model-00095-of-00193.safetensors",
+ "model.layers.39.block_sparse_moe.experts.2.w3.weight": "model-00095-of-00193.safetensors",
+ "model.layers.39.block_sparse_moe.experts.3.w1.weight": "model-00095-of-00193.safetensors",
+ "model.layers.39.block_sparse_moe.experts.3.w2.weight": "model-00096-of-00193.safetensors",
+ "model.layers.39.block_sparse_moe.experts.3.w3.weight": "model-00096-of-00193.safetensors",
+ "model.layers.39.block_sparse_moe.experts.4.w1.weight": "model-00096-of-00193.safetensors",
+ "model.layers.39.block_sparse_moe.experts.4.w2.weight": "model-00096-of-00193.safetensors",
+ "model.layers.39.block_sparse_moe.experts.4.w3.weight": "model-00096-of-00193.safetensors",
+ "model.layers.39.block_sparse_moe.experts.5.w1.weight": "model-00096-of-00193.safetensors",
+ "model.layers.39.block_sparse_moe.experts.5.w2.weight": "model-00096-of-00193.safetensors",
+ "model.layers.39.block_sparse_moe.experts.5.w3.weight": "model-00096-of-00193.safetensors",
+ "model.layers.39.block_sparse_moe.experts.6.w1.weight": "model-00096-of-00193.safetensors",
+ "model.layers.39.block_sparse_moe.experts.6.w2.weight": "model-00096-of-00193.safetensors",
+ "model.layers.39.block_sparse_moe.experts.6.w3.weight": "model-00097-of-00193.safetensors",
+ "model.layers.39.block_sparse_moe.experts.7.w1.weight": "model-00097-of-00193.safetensors",
+ "model.layers.39.block_sparse_moe.experts.7.w2.weight": "model-00097-of-00193.safetensors",
+ "model.layers.39.block_sparse_moe.experts.7.w3.weight": "model-00097-of-00193.safetensors",
+ "model.layers.39.block_sparse_moe.gate.weight": "model-00094-of-00193.safetensors",
+ "model.layers.39.input_layernorm.weight": "model-00097-of-00193.safetensors",
+ "model.layers.39.post_attention_layernorm.weight": "model-00097-of-00193.safetensors",
+ "model.layers.39.self_attn.k_proj.weight": "model-00094-of-00193.safetensors",
+ "model.layers.39.self_attn.o_proj.weight": "model-00094-of-00193.safetensors",
+ "model.layers.39.self_attn.q_proj.weight": "model-00094-of-00193.safetensors",
+ "model.layers.39.self_attn.v_proj.weight": "model-00094-of-00193.safetensors",
+ "model.layers.4.block_sparse_moe.experts.0.w1.weight": "model-00011-of-00193.safetensors",
+ "model.layers.4.block_sparse_moe.experts.0.w2.weight": "model-00011-of-00193.safetensors",
+ "model.layers.4.block_sparse_moe.experts.0.w3.weight": "model-00011-of-00193.safetensors",
+ "model.layers.4.block_sparse_moe.experts.1.w1.weight": "model-00011-of-00193.safetensors",
+ "model.layers.4.block_sparse_moe.experts.1.w2.weight": "model-00011-of-00193.safetensors",
+ "model.layers.4.block_sparse_moe.experts.1.w3.weight": "model-00011-of-00193.safetensors",
+ "model.layers.4.block_sparse_moe.experts.2.w1.weight": "model-00011-of-00193.safetensors",
+ "model.layers.4.block_sparse_moe.experts.2.w2.weight": "model-00011-of-00193.safetensors",
+ "model.layers.4.block_sparse_moe.experts.2.w3.weight": "model-00011-of-00193.safetensors",
+ "model.layers.4.block_sparse_moe.experts.3.w1.weight": "model-00011-of-00193.safetensors",
+ "model.layers.4.block_sparse_moe.experts.3.w2.weight": "model-00012-of-00193.safetensors",
+ "model.layers.4.block_sparse_moe.experts.3.w3.weight": "model-00012-of-00193.safetensors",
+ "model.layers.4.block_sparse_moe.experts.4.w1.weight": "model-00012-of-00193.safetensors",
+ "model.layers.4.block_sparse_moe.experts.4.w2.weight": "model-00012-of-00193.safetensors",
+ "model.layers.4.block_sparse_moe.experts.4.w3.weight": "model-00012-of-00193.safetensors",
+ "model.layers.4.block_sparse_moe.experts.5.w1.weight": "model-00012-of-00193.safetensors",
+ "model.layers.4.block_sparse_moe.experts.5.w2.weight": "model-00012-of-00193.safetensors",
+ "model.layers.4.block_sparse_moe.experts.5.w3.weight": "model-00012-of-00193.safetensors",
+ "model.layers.4.block_sparse_moe.experts.6.w1.weight": "model-00012-of-00193.safetensors",
+ "model.layers.4.block_sparse_moe.experts.6.w2.weight": "model-00012-of-00193.safetensors",
+ "model.layers.4.block_sparse_moe.experts.6.w3.weight": "model-00013-of-00193.safetensors",
+ "model.layers.4.block_sparse_moe.experts.7.w1.weight": "model-00013-of-00193.safetensors",
+ "model.layers.4.block_sparse_moe.experts.7.w2.weight": "model-00013-of-00193.safetensors",
+ "model.layers.4.block_sparse_moe.experts.7.w3.weight": "model-00013-of-00193.safetensors",
+ "model.layers.4.block_sparse_moe.gate.weight": "model-00010-of-00193.safetensors",
+ "model.layers.4.input_layernorm.weight": "model-00013-of-00193.safetensors",
+ "model.layers.4.post_attention_layernorm.weight": "model-00013-of-00193.safetensors",
+ "model.layers.4.self_attn.k_proj.weight": "model-00010-of-00193.safetensors",
+ "model.layers.4.self_attn.o_proj.weight": "model-00010-of-00193.safetensors",
+ "model.layers.4.self_attn.q_proj.weight": "model-00010-of-00193.safetensors",
+ "model.layers.4.self_attn.v_proj.weight": "model-00010-of-00193.safetensors",
+ "model.layers.40.block_sparse_moe.experts.0.w1.weight": "model-00097-of-00193.safetensors",
+ "model.layers.40.block_sparse_moe.experts.0.w2.weight": "model-00097-of-00193.safetensors",
+ "model.layers.40.block_sparse_moe.experts.0.w3.weight": "model-00097-of-00193.safetensors",
+ "model.layers.40.block_sparse_moe.experts.1.w1.weight": "model-00097-of-00193.safetensors",
+ "model.layers.40.block_sparse_moe.experts.1.w2.weight": "model-00097-of-00193.safetensors",
+ "model.layers.40.block_sparse_moe.experts.1.w3.weight": "model-00097-of-00193.safetensors",
+ "model.layers.40.block_sparse_moe.experts.2.w1.weight": "model-00098-of-00193.safetensors",
+ "model.layers.40.block_sparse_moe.experts.2.w2.weight": "model-00098-of-00193.safetensors",
+ "model.layers.40.block_sparse_moe.experts.2.w3.weight": "model-00098-of-00193.safetensors",
+ "model.layers.40.block_sparse_moe.experts.3.w1.weight": "model-00098-of-00193.safetensors",
+ "model.layers.40.block_sparse_moe.experts.3.w2.weight": "model-00098-of-00193.safetensors",
+ "model.layers.40.block_sparse_moe.experts.3.w3.weight": "model-00098-of-00193.safetensors",
+ "model.layers.40.block_sparse_moe.experts.4.w1.weight": "model-00098-of-00193.safetensors",
+ "model.layers.40.block_sparse_moe.experts.4.w2.weight": "model-00098-of-00193.safetensors",
+ "model.layers.40.block_sparse_moe.experts.4.w3.weight": "model-00098-of-00193.safetensors",
+ "model.layers.40.block_sparse_moe.experts.5.w1.weight": "model-00098-of-00193.safetensors",
+ "model.layers.40.block_sparse_moe.experts.5.w2.weight": "model-00099-of-00193.safetensors",
+ "model.layers.40.block_sparse_moe.experts.5.w3.weight": "model-00099-of-00193.safetensors",
+ "model.layers.40.block_sparse_moe.experts.6.w1.weight": "model-00099-of-00193.safetensors",
+ "model.layers.40.block_sparse_moe.experts.6.w2.weight": "model-00099-of-00193.safetensors",
+ "model.layers.40.block_sparse_moe.experts.6.w3.weight": "model-00099-of-00193.safetensors",
+ "model.layers.40.block_sparse_moe.experts.7.w1.weight": "model-00099-of-00193.safetensors",
+ "model.layers.40.block_sparse_moe.experts.7.w2.weight": "model-00099-of-00193.safetensors",
+ "model.layers.40.block_sparse_moe.experts.7.w3.weight": "model-00099-of-00193.safetensors",
+ "model.layers.40.block_sparse_moe.gate.weight": "model-00097-of-00193.safetensors",
+ "model.layers.40.input_layernorm.weight": "model-00099-of-00193.safetensors",
+ "model.layers.40.post_attention_layernorm.weight": "model-00099-of-00193.safetensors",
+ "model.layers.40.self_attn.k_proj.weight": "model-00097-of-00193.safetensors",
+ "model.layers.40.self_attn.o_proj.weight": "model-00097-of-00193.safetensors",
+ "model.layers.40.self_attn.q_proj.weight": "model-00097-of-00193.safetensors",
+ "model.layers.40.self_attn.v_proj.weight": "model-00097-of-00193.safetensors",
+ "model.layers.41.block_sparse_moe.experts.0.w1.weight": "model-00099-of-00193.safetensors",
+ "model.layers.41.block_sparse_moe.experts.0.w2.weight": "model-00099-of-00193.safetensors",
+ "model.layers.41.block_sparse_moe.experts.0.w3.weight": "model-00100-of-00193.safetensors",
+ "model.layers.41.block_sparse_moe.experts.1.w1.weight": "model-00100-of-00193.safetensors",
+ "model.layers.41.block_sparse_moe.experts.1.w2.weight": "model-00100-of-00193.safetensors",
+ "model.layers.41.block_sparse_moe.experts.1.w3.weight": "model-00100-of-00193.safetensors",
+ "model.layers.41.block_sparse_moe.experts.2.w1.weight": "model-00100-of-00193.safetensors",
+ "model.layers.41.block_sparse_moe.experts.2.w2.weight": "model-00100-of-00193.safetensors",
+ "model.layers.41.block_sparse_moe.experts.2.w3.weight": "model-00100-of-00193.safetensors",
+ "model.layers.41.block_sparse_moe.experts.3.w1.weight": "model-00100-of-00193.safetensors",
+ "model.layers.41.block_sparse_moe.experts.3.w2.weight": "model-00100-of-00193.safetensors",
+ "model.layers.41.block_sparse_moe.experts.3.w3.weight": "model-00100-of-00193.safetensors",
+ "model.layers.41.block_sparse_moe.experts.4.w1.weight": "model-00101-of-00193.safetensors",
+ "model.layers.41.block_sparse_moe.experts.4.w2.weight": "model-00101-of-00193.safetensors",
+ "model.layers.41.block_sparse_moe.experts.4.w3.weight": "model-00101-of-00193.safetensors",
+ "model.layers.41.block_sparse_moe.experts.5.w1.weight": "model-00101-of-00193.safetensors",
+ "model.layers.41.block_sparse_moe.experts.5.w2.weight": "model-00101-of-00193.safetensors",
+ "model.layers.41.block_sparse_moe.experts.5.w3.weight": "model-00101-of-00193.safetensors",
+ "model.layers.41.block_sparse_moe.experts.6.w1.weight": "model-00101-of-00193.safetensors",
+ "model.layers.41.block_sparse_moe.experts.6.w2.weight": "model-00101-of-00193.safetensors",
+ "model.layers.41.block_sparse_moe.experts.6.w3.weight": "model-00101-of-00193.safetensors",
+ "model.layers.41.block_sparse_moe.experts.7.w1.weight": "model-00101-of-00193.safetensors",
+ "model.layers.41.block_sparse_moe.experts.7.w2.weight": "model-00102-of-00193.safetensors",
+ "model.layers.41.block_sparse_moe.experts.7.w3.weight": "model-00102-of-00193.safetensors",
+ "model.layers.41.block_sparse_moe.gate.weight": "model-00099-of-00193.safetensors",
+ "model.layers.41.input_layernorm.weight": "model-00102-of-00193.safetensors",
+ "model.layers.41.post_attention_layernorm.weight": "model-00102-of-00193.safetensors",
+ "model.layers.41.self_attn.k_proj.weight": "model-00099-of-00193.safetensors",
+ "model.layers.41.self_attn.o_proj.weight": "model-00099-of-00193.safetensors",
+ "model.layers.41.self_attn.q_proj.weight": "model-00099-of-00193.safetensors",
+ "model.layers.41.self_attn.v_proj.weight": "model-00099-of-00193.safetensors",
+ "model.layers.42.block_sparse_moe.experts.0.w1.weight": "model-00102-of-00193.safetensors",
+ "model.layers.42.block_sparse_moe.experts.0.w2.weight": "model-00102-of-00193.safetensors",
+ "model.layers.42.block_sparse_moe.experts.0.w3.weight": "model-00102-of-00193.safetensors",
+ "model.layers.42.block_sparse_moe.experts.1.w1.weight": "model-00102-of-00193.safetensors",
+ "model.layers.42.block_sparse_moe.experts.1.w2.weight": "model-00102-of-00193.safetensors",
+ "model.layers.42.block_sparse_moe.experts.1.w3.weight": "model-00102-of-00193.safetensors",
+ "model.layers.42.block_sparse_moe.experts.2.w1.weight": "model-00102-of-00193.safetensors",
+ "model.layers.42.block_sparse_moe.experts.2.w2.weight": "model-00102-of-00193.safetensors",
+ "model.layers.42.block_sparse_moe.experts.2.w3.weight": "model-00103-of-00193.safetensors",
+ "model.layers.42.block_sparse_moe.experts.3.w1.weight": "model-00103-of-00193.safetensors",
+ "model.layers.42.block_sparse_moe.experts.3.w2.weight": "model-00103-of-00193.safetensors",
+ "model.layers.42.block_sparse_moe.experts.3.w3.weight": "model-00103-of-00193.safetensors",
+ "model.layers.42.block_sparse_moe.experts.4.w1.weight": "model-00103-of-00193.safetensors",
+ "model.layers.42.block_sparse_moe.experts.4.w2.weight": "model-00103-of-00193.safetensors",
+ "model.layers.42.block_sparse_moe.experts.4.w3.weight": "model-00103-of-00193.safetensors",
+ "model.layers.42.block_sparse_moe.experts.5.w1.weight": "model-00103-of-00193.safetensors",
+ "model.layers.42.block_sparse_moe.experts.5.w2.weight": "model-00103-of-00193.safetensors",
+ "model.layers.42.block_sparse_moe.experts.5.w3.weight": "model-00103-of-00193.safetensors",
+ "model.layers.42.block_sparse_moe.experts.6.w1.weight": "model-00104-of-00193.safetensors",
+ "model.layers.42.block_sparse_moe.experts.6.w2.weight": "model-00104-of-00193.safetensors",
+ "model.layers.42.block_sparse_moe.experts.6.w3.weight": "model-00104-of-00193.safetensors",
+ "model.layers.42.block_sparse_moe.experts.7.w1.weight": "model-00104-of-00193.safetensors",
+ "model.layers.42.block_sparse_moe.experts.7.w2.weight": "model-00104-of-00193.safetensors",
+ "model.layers.42.block_sparse_moe.experts.7.w3.weight": "model-00104-of-00193.safetensors",
+ "model.layers.42.block_sparse_moe.gate.weight": "model-00102-of-00193.safetensors",
+ "model.layers.42.input_layernorm.weight": "model-00104-of-00193.safetensors",
+ "model.layers.42.post_attention_layernorm.weight": "model-00104-of-00193.safetensors",
+ "model.layers.42.self_attn.k_proj.weight": "model-00102-of-00193.safetensors",
+ "model.layers.42.self_attn.o_proj.weight": "model-00102-of-00193.safetensors",
+ "model.layers.42.self_attn.q_proj.weight": "model-00102-of-00193.safetensors",
+ "model.layers.42.self_attn.v_proj.weight": "model-00102-of-00193.safetensors",
+ "model.layers.43.block_sparse_moe.experts.0.w1.weight": "model-00104-of-00193.safetensors",
+ "model.layers.43.block_sparse_moe.experts.0.w2.weight": "model-00104-of-00193.safetensors",
+ "model.layers.43.block_sparse_moe.experts.0.w3.weight": "model-00104-of-00193.safetensors",
+ "model.layers.43.block_sparse_moe.experts.1.w1.weight": "model-00104-of-00193.safetensors",
+ "model.layers.43.block_sparse_moe.experts.1.w2.weight": "model-00105-of-00193.safetensors",
+ "model.layers.43.block_sparse_moe.experts.1.w3.weight": "model-00105-of-00193.safetensors",
+ "model.layers.43.block_sparse_moe.experts.2.w1.weight": "model-00105-of-00193.safetensors",
+ "model.layers.43.block_sparse_moe.experts.2.w2.weight": "model-00105-of-00193.safetensors",
+ "model.layers.43.block_sparse_moe.experts.2.w3.weight": "model-00105-of-00193.safetensors",
+ "model.layers.43.block_sparse_moe.experts.3.w1.weight": "model-00105-of-00193.safetensors",
+ "model.layers.43.block_sparse_moe.experts.3.w2.weight": "model-00105-of-00193.safetensors",
+ "model.layers.43.block_sparse_moe.experts.3.w3.weight": "model-00105-of-00193.safetensors",
+ "model.layers.43.block_sparse_moe.experts.4.w1.weight": "model-00105-of-00193.safetensors",
+ "model.layers.43.block_sparse_moe.experts.4.w2.weight": "model-00105-of-00193.safetensors",
+ "model.layers.43.block_sparse_moe.experts.4.w3.weight": "model-00106-of-00193.safetensors",
+ "model.layers.43.block_sparse_moe.experts.5.w1.weight": "model-00106-of-00193.safetensors",
+ "model.layers.43.block_sparse_moe.experts.5.w2.weight": "model-00106-of-00193.safetensors",
+ "model.layers.43.block_sparse_moe.experts.5.w3.weight": "model-00106-of-00193.safetensors",
+ "model.layers.43.block_sparse_moe.experts.6.w1.weight": "model-00106-of-00193.safetensors",
+ "model.layers.43.block_sparse_moe.experts.6.w2.weight": "model-00106-of-00193.safetensors",
+ "model.layers.43.block_sparse_moe.experts.6.w3.weight": "model-00106-of-00193.safetensors",
+ "model.layers.43.block_sparse_moe.experts.7.w1.weight": "model-00106-of-00193.safetensors",
+ "model.layers.43.block_sparse_moe.experts.7.w2.weight": "model-00106-of-00193.safetensors",
+ "model.layers.43.block_sparse_moe.experts.7.w3.weight": "model-00106-of-00193.safetensors",
+ "model.layers.43.block_sparse_moe.gate.weight": "model-00104-of-00193.safetensors",
+ "model.layers.43.input_layernorm.weight": "model-00106-of-00193.safetensors",
+ "model.layers.43.post_attention_layernorm.weight": "model-00106-of-00193.safetensors",
+ "model.layers.43.self_attn.k_proj.weight": "model-00104-of-00193.safetensors",
+ "model.layers.43.self_attn.o_proj.weight": "model-00104-of-00193.safetensors",
+ "model.layers.43.self_attn.q_proj.weight": "model-00104-of-00193.safetensors",
+ "model.layers.43.self_attn.v_proj.weight": "model-00104-of-00193.safetensors",
+ "model.layers.44.block_sparse_moe.experts.0.w1.weight": "model-00107-of-00193.safetensors",
+ "model.layers.44.block_sparse_moe.experts.0.w2.weight": "model-00107-of-00193.safetensors",
+ "model.layers.44.block_sparse_moe.experts.0.w3.weight": "model-00107-of-00193.safetensors",
+ "model.layers.44.block_sparse_moe.experts.1.w1.weight": "model-00107-of-00193.safetensors",
+ "model.layers.44.block_sparse_moe.experts.1.w2.weight": "model-00107-of-00193.safetensors",
+ "model.layers.44.block_sparse_moe.experts.1.w3.weight": "model-00107-of-00193.safetensors",
+ "model.layers.44.block_sparse_moe.experts.2.w1.weight": "model-00107-of-00193.safetensors",
+ "model.layers.44.block_sparse_moe.experts.2.w2.weight": "model-00107-of-00193.safetensors",
+ "model.layers.44.block_sparse_moe.experts.2.w3.weight": "model-00107-of-00193.safetensors",
+ "model.layers.44.block_sparse_moe.experts.3.w1.weight": "model-00107-of-00193.safetensors",
+ "model.layers.44.block_sparse_moe.experts.3.w2.weight": "model-00108-of-00193.safetensors",
+ "model.layers.44.block_sparse_moe.experts.3.w3.weight": "model-00108-of-00193.safetensors",
+ "model.layers.44.block_sparse_moe.experts.4.w1.weight": "model-00108-of-00193.safetensors",
+ "model.layers.44.block_sparse_moe.experts.4.w2.weight": "model-00108-of-00193.safetensors",
+ "model.layers.44.block_sparse_moe.experts.4.w3.weight": "model-00108-of-00193.safetensors",
+ "model.layers.44.block_sparse_moe.experts.5.w1.weight": "model-00108-of-00193.safetensors",
+ "model.layers.44.block_sparse_moe.experts.5.w2.weight": "model-00108-of-00193.safetensors",
+ "model.layers.44.block_sparse_moe.experts.5.w3.weight": "model-00108-of-00193.safetensors",
+ "model.layers.44.block_sparse_moe.experts.6.w1.weight": "model-00108-of-00193.safetensors",
+ "model.layers.44.block_sparse_moe.experts.6.w2.weight": "model-00108-of-00193.safetensors",
+ "model.layers.44.block_sparse_moe.experts.6.w3.weight": "model-00109-of-00193.safetensors",
+ "model.layers.44.block_sparse_moe.experts.7.w1.weight": "model-00109-of-00193.safetensors",
+ "model.layers.44.block_sparse_moe.experts.7.w2.weight": "model-00109-of-00193.safetensors",
+ "model.layers.44.block_sparse_moe.experts.7.w3.weight": "model-00109-of-00193.safetensors",
+ "model.layers.44.block_sparse_moe.gate.weight": "model-00106-of-00193.safetensors",
+ "model.layers.44.input_layernorm.weight": "model-00109-of-00193.safetensors",
+ "model.layers.44.post_attention_layernorm.weight": "model-00109-of-00193.safetensors",
+ "model.layers.44.self_attn.k_proj.weight": "model-00106-of-00193.safetensors",
+ "model.layers.44.self_attn.o_proj.weight": "model-00106-of-00193.safetensors",
+ "model.layers.44.self_attn.q_proj.weight": "model-00106-of-00193.safetensors",
+ "model.layers.44.self_attn.v_proj.weight": "model-00106-of-00193.safetensors",
+ "model.layers.45.block_sparse_moe.experts.0.w1.weight": "model-00109-of-00193.safetensors",
+ "model.layers.45.block_sparse_moe.experts.0.w2.weight": "model-00109-of-00193.safetensors",
+ "model.layers.45.block_sparse_moe.experts.0.w3.weight": "model-00109-of-00193.safetensors",
+ "model.layers.45.block_sparse_moe.experts.1.w1.weight": "model-00109-of-00193.safetensors",
+ "model.layers.45.block_sparse_moe.experts.1.w2.weight": "model-00109-of-00193.safetensors",
+ "model.layers.45.block_sparse_moe.experts.1.w3.weight": "model-00109-of-00193.safetensors",
+ "model.layers.45.block_sparse_moe.experts.2.w1.weight": "model-00110-of-00193.safetensors",
+ "model.layers.45.block_sparse_moe.experts.2.w2.weight": "model-00110-of-00193.safetensors",
+ "model.layers.45.block_sparse_moe.experts.2.w3.weight": "model-00110-of-00193.safetensors",
+ "model.layers.45.block_sparse_moe.experts.3.w1.weight": "model-00110-of-00193.safetensors",
+ "model.layers.45.block_sparse_moe.experts.3.w2.weight": "model-00110-of-00193.safetensors",
+ "model.layers.45.block_sparse_moe.experts.3.w3.weight": "model-00110-of-00193.safetensors",
+ "model.layers.45.block_sparse_moe.experts.4.w1.weight": "model-00110-of-00193.safetensors",
+ "model.layers.45.block_sparse_moe.experts.4.w2.weight": "model-00110-of-00193.safetensors",
+ "model.layers.45.block_sparse_moe.experts.4.w3.weight": "model-00110-of-00193.safetensors",
+ "model.layers.45.block_sparse_moe.experts.5.w1.weight": "model-00110-of-00193.safetensors",
+ "model.layers.45.block_sparse_moe.experts.5.w2.weight": "model-00111-of-00193.safetensors",
+ "model.layers.45.block_sparse_moe.experts.5.w3.weight": "model-00111-of-00193.safetensors",
+ "model.layers.45.block_sparse_moe.experts.6.w1.weight": "model-00111-of-00193.safetensors",
+ "model.layers.45.block_sparse_moe.experts.6.w2.weight": "model-00111-of-00193.safetensors",
+ "model.layers.45.block_sparse_moe.experts.6.w3.weight": "model-00111-of-00193.safetensors",
+ "model.layers.45.block_sparse_moe.experts.7.w1.weight": "model-00111-of-00193.safetensors",
+ "model.layers.45.block_sparse_moe.experts.7.w2.weight": "model-00111-of-00193.safetensors",
+ "model.layers.45.block_sparse_moe.experts.7.w3.weight": "model-00111-of-00193.safetensors",
+ "model.layers.45.block_sparse_moe.gate.weight": "model-00109-of-00193.safetensors",
+ "model.layers.45.input_layernorm.weight": "model-00111-of-00193.safetensors",
+ "model.layers.45.post_attention_layernorm.weight": "model-00111-of-00193.safetensors",
+ "model.layers.45.self_attn.k_proj.weight": "model-00109-of-00193.safetensors",
+ "model.layers.45.self_attn.o_proj.weight": "model-00109-of-00193.safetensors",
+ "model.layers.45.self_attn.q_proj.weight": "model-00109-of-00193.safetensors",
+ "model.layers.45.self_attn.v_proj.weight": "model-00109-of-00193.safetensors",
+ "model.layers.46.block_sparse_moe.experts.0.w1.weight": "model-00111-of-00193.safetensors",
+ "model.layers.46.block_sparse_moe.experts.0.w2.weight": "model-00111-of-00193.safetensors",
+ "model.layers.46.block_sparse_moe.experts.0.w3.weight": "model-00112-of-00193.safetensors",
+ "model.layers.46.block_sparse_moe.experts.1.w1.weight": "model-00112-of-00193.safetensors",
+ "model.layers.46.block_sparse_moe.experts.1.w2.weight": "model-00112-of-00193.safetensors",
+ "model.layers.46.block_sparse_moe.experts.1.w3.weight": "model-00112-of-00193.safetensors",
+ "model.layers.46.block_sparse_moe.experts.2.w1.weight": "model-00112-of-00193.safetensors",
+ "model.layers.46.block_sparse_moe.experts.2.w2.weight": "model-00112-of-00193.safetensors",
+ "model.layers.46.block_sparse_moe.experts.2.w3.weight": "model-00112-of-00193.safetensors",
+ "model.layers.46.block_sparse_moe.experts.3.w1.weight": "model-00112-of-00193.safetensors",
+ "model.layers.46.block_sparse_moe.experts.3.w2.weight": "model-00112-of-00193.safetensors",
+ "model.layers.46.block_sparse_moe.experts.3.w3.weight": "model-00112-of-00193.safetensors",
+ "model.layers.46.block_sparse_moe.experts.4.w1.weight": "model-00113-of-00193.safetensors",
+ "model.layers.46.block_sparse_moe.experts.4.w2.weight": "model-00113-of-00193.safetensors",
+ "model.layers.46.block_sparse_moe.experts.4.w3.weight": "model-00113-of-00193.safetensors",
+ "model.layers.46.block_sparse_moe.experts.5.w1.weight": "model-00113-of-00193.safetensors",
+ "model.layers.46.block_sparse_moe.experts.5.w2.weight": "model-00113-of-00193.safetensors",
+ "model.layers.46.block_sparse_moe.experts.5.w3.weight": "model-00113-of-00193.safetensors",
+ "model.layers.46.block_sparse_moe.experts.6.w1.weight": "model-00113-of-00193.safetensors",
+ "model.layers.46.block_sparse_moe.experts.6.w2.weight": "model-00113-of-00193.safetensors",
+ "model.layers.46.block_sparse_moe.experts.6.w3.weight": "model-00113-of-00193.safetensors",
+ "model.layers.46.block_sparse_moe.experts.7.w1.weight": "model-00113-of-00193.safetensors",
+ "model.layers.46.block_sparse_moe.experts.7.w2.weight": "model-00114-of-00193.safetensors",
+ "model.layers.46.block_sparse_moe.experts.7.w3.weight": "model-00114-of-00193.safetensors",
+ "model.layers.46.block_sparse_moe.gate.weight": "model-00111-of-00193.safetensors",
+ "model.layers.46.input_layernorm.weight": "model-00114-of-00193.safetensors",
+ "model.layers.46.post_attention_layernorm.weight": "model-00114-of-00193.safetensors",
+ "model.layers.46.self_attn.k_proj.weight": "model-00111-of-00193.safetensors",
+ "model.layers.46.self_attn.o_proj.weight": "model-00111-of-00193.safetensors",
+ "model.layers.46.self_attn.q_proj.weight": "model-00111-of-00193.safetensors",
+ "model.layers.46.self_attn.v_proj.weight": "model-00111-of-00193.safetensors",
+ "model.layers.47.block_sparse_moe.experts.0.w1.weight": "model-00114-of-00193.safetensors",
+ "model.layers.47.block_sparse_moe.experts.0.w2.weight": "model-00114-of-00193.safetensors",
+ "model.layers.47.block_sparse_moe.experts.0.w3.weight": "model-00114-of-00193.safetensors",
+ "model.layers.47.block_sparse_moe.experts.1.w1.weight": "model-00114-of-00193.safetensors",
+ "model.layers.47.block_sparse_moe.experts.1.w2.weight": "model-00114-of-00193.safetensors",
+ "model.layers.47.block_sparse_moe.experts.1.w3.weight": "model-00114-of-00193.safetensors",
+ "model.layers.47.block_sparse_moe.experts.2.w1.weight": "model-00114-of-00193.safetensors",
+ "model.layers.47.block_sparse_moe.experts.2.w2.weight": "model-00114-of-00193.safetensors",
+ "model.layers.47.block_sparse_moe.experts.2.w3.weight": "model-00115-of-00193.safetensors",
+ "model.layers.47.block_sparse_moe.experts.3.w1.weight": "model-00115-of-00193.safetensors",
+ "model.layers.47.block_sparse_moe.experts.3.w2.weight": "model-00115-of-00193.safetensors",
+ "model.layers.47.block_sparse_moe.experts.3.w3.weight": "model-00115-of-00193.safetensors",
+ "model.layers.47.block_sparse_moe.experts.4.w1.weight": "model-00115-of-00193.safetensors",
+ "model.layers.47.block_sparse_moe.experts.4.w2.weight": "model-00115-of-00193.safetensors",
+ "model.layers.47.block_sparse_moe.experts.4.w3.weight": "model-00115-of-00193.safetensors",
+ "model.layers.47.block_sparse_moe.experts.5.w1.weight": "model-00115-of-00193.safetensors",
+ "model.layers.47.block_sparse_moe.experts.5.w2.weight": "model-00115-of-00193.safetensors",
+ "model.layers.47.block_sparse_moe.experts.5.w3.weight": "model-00115-of-00193.safetensors",
+ "model.layers.47.block_sparse_moe.experts.6.w1.weight": "model-00116-of-00193.safetensors",
+ "model.layers.47.block_sparse_moe.experts.6.w2.weight": "model-00116-of-00193.safetensors",
+ "model.layers.47.block_sparse_moe.experts.6.w3.weight": "model-00116-of-00193.safetensors",
+ "model.layers.47.block_sparse_moe.experts.7.w1.weight": "model-00116-of-00193.safetensors",
+ "model.layers.47.block_sparse_moe.experts.7.w2.weight": "model-00116-of-00193.safetensors",
+ "model.layers.47.block_sparse_moe.experts.7.w3.weight": "model-00116-of-00193.safetensors",
+ "model.layers.47.block_sparse_moe.gate.weight": "model-00114-of-00193.safetensors",
+ "model.layers.47.input_layernorm.weight": "model-00116-of-00193.safetensors",
+ "model.layers.47.post_attention_layernorm.weight": "model-00116-of-00193.safetensors",
+ "model.layers.47.self_attn.k_proj.weight": "model-00114-of-00193.safetensors",
+ "model.layers.47.self_attn.o_proj.weight": "model-00114-of-00193.safetensors",
+ "model.layers.47.self_attn.q_proj.weight": "model-00114-of-00193.safetensors",
+ "model.layers.47.self_attn.v_proj.weight": "model-00114-of-00193.safetensors",
+ "model.layers.48.block_sparse_moe.experts.0.w1.weight": "model-00116-of-00193.safetensors",
+ "model.layers.48.block_sparse_moe.experts.0.w2.weight": "model-00116-of-00193.safetensors",
+ "model.layers.48.block_sparse_moe.experts.0.w3.weight": "model-00116-of-00193.safetensors",
+ "model.layers.48.block_sparse_moe.experts.1.w1.weight": "model-00116-of-00193.safetensors",
+ "model.layers.48.block_sparse_moe.experts.1.w2.weight": "model-00117-of-00193.safetensors",
+ "model.layers.48.block_sparse_moe.experts.1.w3.weight": "model-00117-of-00193.safetensors",
+ "model.layers.48.block_sparse_moe.experts.2.w1.weight": "model-00117-of-00193.safetensors",
+ "model.layers.48.block_sparse_moe.experts.2.w2.weight": "model-00117-of-00193.safetensors",
+ "model.layers.48.block_sparse_moe.experts.2.w3.weight": "model-00117-of-00193.safetensors",
+ "model.layers.48.block_sparse_moe.experts.3.w1.weight": "model-00117-of-00193.safetensors",
+ "model.layers.48.block_sparse_moe.experts.3.w2.weight": "model-00117-of-00193.safetensors",
+ "model.layers.48.block_sparse_moe.experts.3.w3.weight": "model-00117-of-00193.safetensors",
+ "model.layers.48.block_sparse_moe.experts.4.w1.weight": "model-00117-of-00193.safetensors",
+ "model.layers.48.block_sparse_moe.experts.4.w2.weight": "model-00117-of-00193.safetensors",
+ "model.layers.48.block_sparse_moe.experts.4.w3.weight": "model-00118-of-00193.safetensors",
+ "model.layers.48.block_sparse_moe.experts.5.w1.weight": "model-00118-of-00193.safetensors",
+ "model.layers.48.block_sparse_moe.experts.5.w2.weight": "model-00118-of-00193.safetensors",
+ "model.layers.48.block_sparse_moe.experts.5.w3.weight": "model-00118-of-00193.safetensors",
+ "model.layers.48.block_sparse_moe.experts.6.w1.weight": "model-00118-of-00193.safetensors",
+ "model.layers.48.block_sparse_moe.experts.6.w2.weight": "model-00118-of-00193.safetensors",
+ "model.layers.48.block_sparse_moe.experts.6.w3.weight": "model-00118-of-00193.safetensors",
+ "model.layers.48.block_sparse_moe.experts.7.w1.weight": "model-00118-of-00193.safetensors",
+ "model.layers.48.block_sparse_moe.experts.7.w2.weight": "model-00118-of-00193.safetensors",
+ "model.layers.48.block_sparse_moe.experts.7.w3.weight": "model-00118-of-00193.safetensors",
+ "model.layers.48.block_sparse_moe.gate.weight": "model-00116-of-00193.safetensors",
+ "model.layers.48.input_layernorm.weight": "model-00118-of-00193.safetensors",
+ "model.layers.48.post_attention_layernorm.weight": "model-00118-of-00193.safetensors",
+ "model.layers.48.self_attn.k_proj.weight": "model-00116-of-00193.safetensors",
+ "model.layers.48.self_attn.o_proj.weight": "model-00116-of-00193.safetensors",
+ "model.layers.48.self_attn.q_proj.weight": "model-00116-of-00193.safetensors",
+ "model.layers.48.self_attn.v_proj.weight": "model-00116-of-00193.safetensors",
+ "model.layers.49.block_sparse_moe.experts.0.w1.weight": "model-00119-of-00193.safetensors",
+ "model.layers.49.block_sparse_moe.experts.0.w2.weight": "model-00119-of-00193.safetensors",
+ "model.layers.49.block_sparse_moe.experts.0.w3.weight": "model-00119-of-00193.safetensors",
+ "model.layers.49.block_sparse_moe.experts.1.w1.weight": "model-00119-of-00193.safetensors",
+ "model.layers.49.block_sparse_moe.experts.1.w2.weight": "model-00119-of-00193.safetensors",
+ "model.layers.49.block_sparse_moe.experts.1.w3.weight": "model-00119-of-00193.safetensors",
+ "model.layers.49.block_sparse_moe.experts.2.w1.weight": "model-00119-of-00193.safetensors",
+ "model.layers.49.block_sparse_moe.experts.2.w2.weight": "model-00119-of-00193.safetensors",
+ "model.layers.49.block_sparse_moe.experts.2.w3.weight": "model-00119-of-00193.safetensors",
+ "model.layers.49.block_sparse_moe.experts.3.w1.weight": "model-00119-of-00193.safetensors",
+ "model.layers.49.block_sparse_moe.experts.3.w2.weight": "model-00120-of-00193.safetensors",
+ "model.layers.49.block_sparse_moe.experts.3.w3.weight": "model-00120-of-00193.safetensors",
+ "model.layers.49.block_sparse_moe.experts.4.w1.weight": "model-00120-of-00193.safetensors",
+ "model.layers.49.block_sparse_moe.experts.4.w2.weight": "model-00120-of-00193.safetensors",
+ "model.layers.49.block_sparse_moe.experts.4.w3.weight": "model-00120-of-00193.safetensors",
+ "model.layers.49.block_sparse_moe.experts.5.w1.weight": "model-00120-of-00193.safetensors",
+ "model.layers.49.block_sparse_moe.experts.5.w2.weight": "model-00120-of-00193.safetensors",
+ "model.layers.49.block_sparse_moe.experts.5.w3.weight": "model-00120-of-00193.safetensors",
+ "model.layers.49.block_sparse_moe.experts.6.w1.weight": "model-00120-of-00193.safetensors",
+ "model.layers.49.block_sparse_moe.experts.6.w2.weight": "model-00120-of-00193.safetensors",
+ "model.layers.49.block_sparse_moe.experts.6.w3.weight": "model-00121-of-00193.safetensors",
+ "model.layers.49.block_sparse_moe.experts.7.w1.weight": "model-00121-of-00193.safetensors",
+ "model.layers.49.block_sparse_moe.experts.7.w2.weight": "model-00121-of-00193.safetensors",
+ "model.layers.49.block_sparse_moe.experts.7.w3.weight": "model-00121-of-00193.safetensors",
+ "model.layers.49.block_sparse_moe.gate.weight": "model-00118-of-00193.safetensors",
+ "model.layers.49.input_layernorm.weight": "model-00121-of-00193.safetensors",
+ "model.layers.49.post_attention_layernorm.weight": "model-00121-of-00193.safetensors",
+ "model.layers.49.self_attn.k_proj.weight": "model-00118-of-00193.safetensors",
+ "model.layers.49.self_attn.o_proj.weight": "model-00118-of-00193.safetensors",
+ "model.layers.49.self_attn.q_proj.weight": "model-00118-of-00193.safetensors",
+ "model.layers.49.self_attn.v_proj.weight": "model-00118-of-00193.safetensors",
+ "model.layers.5.block_sparse_moe.experts.0.w1.weight": "model-00013-of-00193.safetensors",
+ "model.layers.5.block_sparse_moe.experts.0.w2.weight": "model-00013-of-00193.safetensors",
+ "model.layers.5.block_sparse_moe.experts.0.w3.weight": "model-00013-of-00193.safetensors",
+ "model.layers.5.block_sparse_moe.experts.1.w1.weight": "model-00013-of-00193.safetensors",
+ "model.layers.5.block_sparse_moe.experts.1.w2.weight": "model-00013-of-00193.safetensors",
+ "model.layers.5.block_sparse_moe.experts.1.w3.weight": "model-00013-of-00193.safetensors",
+ "model.layers.5.block_sparse_moe.experts.2.w1.weight": "model-00014-of-00193.safetensors",
+ "model.layers.5.block_sparse_moe.experts.2.w2.weight": "model-00014-of-00193.safetensors",
+ "model.layers.5.block_sparse_moe.experts.2.w3.weight": "model-00014-of-00193.safetensors",
+ "model.layers.5.block_sparse_moe.experts.3.w1.weight": "model-00014-of-00193.safetensors",
+ "model.layers.5.block_sparse_moe.experts.3.w2.weight": "model-00014-of-00193.safetensors",
+ "model.layers.5.block_sparse_moe.experts.3.w3.weight": "model-00014-of-00193.safetensors",
+ "model.layers.5.block_sparse_moe.experts.4.w1.weight": "model-00014-of-00193.safetensors",
+ "model.layers.5.block_sparse_moe.experts.4.w2.weight": "model-00014-of-00193.safetensors",
+ "model.layers.5.block_sparse_moe.experts.4.w3.weight": "model-00014-of-00193.safetensors",
+ "model.layers.5.block_sparse_moe.experts.5.w1.weight": "model-00014-of-00193.safetensors",
+ "model.layers.5.block_sparse_moe.experts.5.w2.weight": "model-00015-of-00193.safetensors",
+ "model.layers.5.block_sparse_moe.experts.5.w3.weight": "model-00015-of-00193.safetensors",
+ "model.layers.5.block_sparse_moe.experts.6.w1.weight": "model-00015-of-00193.safetensors",
+ "model.layers.5.block_sparse_moe.experts.6.w2.weight": "model-00015-of-00193.safetensors",
+ "model.layers.5.block_sparse_moe.experts.6.w3.weight": "model-00015-of-00193.safetensors",
+ "model.layers.5.block_sparse_moe.experts.7.w1.weight": "model-00015-of-00193.safetensors",
+ "model.layers.5.block_sparse_moe.experts.7.w2.weight": "model-00015-of-00193.safetensors",
+ "model.layers.5.block_sparse_moe.experts.7.w3.weight": "model-00015-of-00193.safetensors",
+ "model.layers.5.block_sparse_moe.gate.weight": "model-00013-of-00193.safetensors",
+ "model.layers.5.input_layernorm.weight": "model-00015-of-00193.safetensors",
+ "model.layers.5.post_attention_layernorm.weight": "model-00015-of-00193.safetensors",
+ "model.layers.5.self_attn.k_proj.weight": "model-00013-of-00193.safetensors",
+ "model.layers.5.self_attn.o_proj.weight": "model-00013-of-00193.safetensors",
+ "model.layers.5.self_attn.q_proj.weight": "model-00013-of-00193.safetensors",
+ "model.layers.5.self_attn.v_proj.weight": "model-00013-of-00193.safetensors",
+ "model.layers.50.block_sparse_moe.experts.0.w1.weight": "model-00121-of-00193.safetensors",
+ "model.layers.50.block_sparse_moe.experts.0.w2.weight": "model-00121-of-00193.safetensors",
+ "model.layers.50.block_sparse_moe.experts.0.w3.weight": "model-00121-of-00193.safetensors",
+ "model.layers.50.block_sparse_moe.experts.1.w1.weight": "model-00121-of-00193.safetensors",
+ "model.layers.50.block_sparse_moe.experts.1.w2.weight": "model-00121-of-00193.safetensors",
+ "model.layers.50.block_sparse_moe.experts.1.w3.weight": "model-00121-of-00193.safetensors",
+ "model.layers.50.block_sparse_moe.experts.2.w1.weight": "model-00122-of-00193.safetensors",
+ "model.layers.50.block_sparse_moe.experts.2.w2.weight": "model-00122-of-00193.safetensors",
+ "model.layers.50.block_sparse_moe.experts.2.w3.weight": "model-00122-of-00193.safetensors",
+ "model.layers.50.block_sparse_moe.experts.3.w1.weight": "model-00122-of-00193.safetensors",
+ "model.layers.50.block_sparse_moe.experts.3.w2.weight": "model-00122-of-00193.safetensors",
+ "model.layers.50.block_sparse_moe.experts.3.w3.weight": "model-00122-of-00193.safetensors",
+ "model.layers.50.block_sparse_moe.experts.4.w1.weight": "model-00122-of-00193.safetensors",
+ "model.layers.50.block_sparse_moe.experts.4.w2.weight": "model-00122-of-00193.safetensors",
+ "model.layers.50.block_sparse_moe.experts.4.w3.weight": "model-00122-of-00193.safetensors",
+ "model.layers.50.block_sparse_moe.experts.5.w1.weight": "model-00122-of-00193.safetensors",
+ "model.layers.50.block_sparse_moe.experts.5.w2.weight": "model-00123-of-00193.safetensors",
+ "model.layers.50.block_sparse_moe.experts.5.w3.weight": "model-00123-of-00193.safetensors",
+ "model.layers.50.block_sparse_moe.experts.6.w1.weight": "model-00123-of-00193.safetensors",
+ "model.layers.50.block_sparse_moe.experts.6.w2.weight": "model-00123-of-00193.safetensors",
+ "model.layers.50.block_sparse_moe.experts.6.w3.weight": "model-00123-of-00193.safetensors",
+ "model.layers.50.block_sparse_moe.experts.7.w1.weight": "model-00123-of-00193.safetensors",
+ "model.layers.50.block_sparse_moe.experts.7.w2.weight": "model-00123-of-00193.safetensors",
+ "model.layers.50.block_sparse_moe.experts.7.w3.weight": "model-00123-of-00193.safetensors",
+ "model.layers.50.block_sparse_moe.gate.weight": "model-00121-of-00193.safetensors",
+ "model.layers.50.input_layernorm.weight": "model-00123-of-00193.safetensors",
+ "model.layers.50.post_attention_layernorm.weight": "model-00123-of-00193.safetensors",
+ "model.layers.50.self_attn.k_proj.weight": "model-00121-of-00193.safetensors",
+ "model.layers.50.self_attn.o_proj.weight": "model-00121-of-00193.safetensors",
+ "model.layers.50.self_attn.q_proj.weight": "model-00121-of-00193.safetensors",
+ "model.layers.50.self_attn.v_proj.weight": "model-00121-of-00193.safetensors",
+ "model.layers.51.block_sparse_moe.experts.0.w1.weight": "model-00123-of-00193.safetensors",
+ "model.layers.51.block_sparse_moe.experts.0.w2.weight": "model-00123-of-00193.safetensors",
+ "model.layers.51.block_sparse_moe.experts.0.w3.weight": "model-00124-of-00193.safetensors",
+ "model.layers.51.block_sparse_moe.experts.1.w1.weight": "model-00124-of-00193.safetensors",
+ "model.layers.51.block_sparse_moe.experts.1.w2.weight": "model-00124-of-00193.safetensors",
+ "model.layers.51.block_sparse_moe.experts.1.w3.weight": "model-00124-of-00193.safetensors",
+ "model.layers.51.block_sparse_moe.experts.2.w1.weight": "model-00124-of-00193.safetensors",
+ "model.layers.51.block_sparse_moe.experts.2.w2.weight": "model-00124-of-00193.safetensors",
+ "model.layers.51.block_sparse_moe.experts.2.w3.weight": "model-00124-of-00193.safetensors",
+ "model.layers.51.block_sparse_moe.experts.3.w1.weight": "model-00124-of-00193.safetensors",
+ "model.layers.51.block_sparse_moe.experts.3.w2.weight": "model-00124-of-00193.safetensors",
+ "model.layers.51.block_sparse_moe.experts.3.w3.weight": "model-00124-of-00193.safetensors",
+ "model.layers.51.block_sparse_moe.experts.4.w1.weight": "model-00125-of-00193.safetensors",
+ "model.layers.51.block_sparse_moe.experts.4.w2.weight": "model-00125-of-00193.safetensors",
+ "model.layers.51.block_sparse_moe.experts.4.w3.weight": "model-00125-of-00193.safetensors",
+ "model.layers.51.block_sparse_moe.experts.5.w1.weight": "model-00125-of-00193.safetensors",
+ "model.layers.51.block_sparse_moe.experts.5.w2.weight": "model-00125-of-00193.safetensors",
+ "model.layers.51.block_sparse_moe.experts.5.w3.weight": "model-00125-of-00193.safetensors",
+ "model.layers.51.block_sparse_moe.experts.6.w1.weight": "model-00125-of-00193.safetensors",
+ "model.layers.51.block_sparse_moe.experts.6.w2.weight": "model-00125-of-00193.safetensors",
+ "model.layers.51.block_sparse_moe.experts.6.w3.weight": "model-00125-of-00193.safetensors",
+ "model.layers.51.block_sparse_moe.experts.7.w1.weight": "model-00125-of-00193.safetensors",
+ "model.layers.51.block_sparse_moe.experts.7.w2.weight": "model-00126-of-00193.safetensors",
+ "model.layers.51.block_sparse_moe.experts.7.w3.weight": "model-00126-of-00193.safetensors",
+ "model.layers.51.block_sparse_moe.gate.weight": "model-00123-of-00193.safetensors",
+ "model.layers.51.input_layernorm.weight": "model-00126-of-00193.safetensors",
+ "model.layers.51.post_attention_layernorm.weight": "model-00126-of-00193.safetensors",
+ "model.layers.51.self_attn.k_proj.weight": "model-00123-of-00193.safetensors",
+ "model.layers.51.self_attn.o_proj.weight": "model-00123-of-00193.safetensors",
+ "model.layers.51.self_attn.q_proj.weight": "model-00123-of-00193.safetensors",
+ "model.layers.51.self_attn.v_proj.weight": "model-00123-of-00193.safetensors",
+ "model.layers.52.block_sparse_moe.experts.0.w1.weight": "model-00126-of-00193.safetensors",
+ "model.layers.52.block_sparse_moe.experts.0.w2.weight": "model-00126-of-00193.safetensors",
+ "model.layers.52.block_sparse_moe.experts.0.w3.weight": "model-00126-of-00193.safetensors",
+ "model.layers.52.block_sparse_moe.experts.1.w1.weight": "model-00126-of-00193.safetensors",
+ "model.layers.52.block_sparse_moe.experts.1.w2.weight": "model-00126-of-00193.safetensors",
+ "model.layers.52.block_sparse_moe.experts.1.w3.weight": "model-00126-of-00193.safetensors",
+ "model.layers.52.block_sparse_moe.experts.2.w1.weight": "model-00126-of-00193.safetensors",
+ "model.layers.52.block_sparse_moe.experts.2.w2.weight": "model-00126-of-00193.safetensors",
+ "model.layers.52.block_sparse_moe.experts.2.w3.weight": "model-00127-of-00193.safetensors",
+ "model.layers.52.block_sparse_moe.experts.3.w1.weight": "model-00127-of-00193.safetensors",
+ "model.layers.52.block_sparse_moe.experts.3.w2.weight": "model-00127-of-00193.safetensors",
+ "model.layers.52.block_sparse_moe.experts.3.w3.weight": "model-00127-of-00193.safetensors",
+ "model.layers.52.block_sparse_moe.experts.4.w1.weight": "model-00127-of-00193.safetensors",
+ "model.layers.52.block_sparse_moe.experts.4.w2.weight": "model-00127-of-00193.safetensors",
+ "model.layers.52.block_sparse_moe.experts.4.w3.weight": "model-00127-of-00193.safetensors",
+ "model.layers.52.block_sparse_moe.experts.5.w1.weight": "model-00127-of-00193.safetensors",
+ "model.layers.52.block_sparse_moe.experts.5.w2.weight": "model-00127-of-00193.safetensors",
+ "model.layers.52.block_sparse_moe.experts.5.w3.weight": "model-00127-of-00193.safetensors",
+ "model.layers.52.block_sparse_moe.experts.6.w1.weight": "model-00128-of-00193.safetensors",
+ "model.layers.52.block_sparse_moe.experts.6.w2.weight": "model-00128-of-00193.safetensors",
+ "model.layers.52.block_sparse_moe.experts.6.w3.weight": "model-00128-of-00193.safetensors",
+ "model.layers.52.block_sparse_moe.experts.7.w1.weight": "model-00128-of-00193.safetensors",
+ "model.layers.52.block_sparse_moe.experts.7.w2.weight": "model-00128-of-00193.safetensors",
+ "model.layers.52.block_sparse_moe.experts.7.w3.weight": "model-00128-of-00193.safetensors",
+ "model.layers.52.block_sparse_moe.gate.weight": "model-00126-of-00193.safetensors",
+ "model.layers.52.input_layernorm.weight": "model-00128-of-00193.safetensors",
+ "model.layers.52.post_attention_layernorm.weight": "model-00128-of-00193.safetensors",
+ "model.layers.52.self_attn.k_proj.weight": "model-00126-of-00193.safetensors",
+ "model.layers.52.self_attn.o_proj.weight": "model-00126-of-00193.safetensors",
+ "model.layers.52.self_attn.q_proj.weight": "model-00126-of-00193.safetensors",
+ "model.layers.52.self_attn.v_proj.weight": "model-00126-of-00193.safetensors",
+ "model.layers.53.block_sparse_moe.experts.0.w1.weight": "model-00128-of-00193.safetensors",
+ "model.layers.53.block_sparse_moe.experts.0.w2.weight": "model-00128-of-00193.safetensors",
+ "model.layers.53.block_sparse_moe.experts.0.w3.weight": "model-00128-of-00193.safetensors",
+ "model.layers.53.block_sparse_moe.experts.1.w1.weight": "model-00128-of-00193.safetensors",
+ "model.layers.53.block_sparse_moe.experts.1.w2.weight": "model-00129-of-00193.safetensors",
+ "model.layers.53.block_sparse_moe.experts.1.w3.weight": "model-00129-of-00193.safetensors",
+ "model.layers.53.block_sparse_moe.experts.2.w1.weight": "model-00129-of-00193.safetensors",
+ "model.layers.53.block_sparse_moe.experts.2.w2.weight": "model-00129-of-00193.safetensors",
+ "model.layers.53.block_sparse_moe.experts.2.w3.weight": "model-00129-of-00193.safetensors",
+ "model.layers.53.block_sparse_moe.experts.3.w1.weight": "model-00129-of-00193.safetensors",
+ "model.layers.53.block_sparse_moe.experts.3.w2.weight": "model-00129-of-00193.safetensors",
+ "model.layers.53.block_sparse_moe.experts.3.w3.weight": "model-00129-of-00193.safetensors",
+ "model.layers.53.block_sparse_moe.experts.4.w1.weight": "model-00129-of-00193.safetensors",
+ "model.layers.53.block_sparse_moe.experts.4.w2.weight": "model-00129-of-00193.safetensors",
+ "model.layers.53.block_sparse_moe.experts.4.w3.weight": "model-00130-of-00193.safetensors",
+ "model.layers.53.block_sparse_moe.experts.5.w1.weight": "model-00130-of-00193.safetensors",
+ "model.layers.53.block_sparse_moe.experts.5.w2.weight": "model-00130-of-00193.safetensors",
+ "model.layers.53.block_sparse_moe.experts.5.w3.weight": "model-00130-of-00193.safetensors",
+ "model.layers.53.block_sparse_moe.experts.6.w1.weight": "model-00130-of-00193.safetensors",
+ "model.layers.53.block_sparse_moe.experts.6.w2.weight": "model-00130-of-00193.safetensors",
+ "model.layers.53.block_sparse_moe.experts.6.w3.weight": "model-00130-of-00193.safetensors",
+ "model.layers.53.block_sparse_moe.experts.7.w1.weight": "model-00130-of-00193.safetensors",
+ "model.layers.53.block_sparse_moe.experts.7.w2.weight": "model-00130-of-00193.safetensors",
+ "model.layers.53.block_sparse_moe.experts.7.w3.weight": "model-00130-of-00193.safetensors",
+ "model.layers.53.block_sparse_moe.gate.weight": "model-00128-of-00193.safetensors",
+ "model.layers.53.input_layernorm.weight": "model-00130-of-00193.safetensors",
+ "model.layers.53.post_attention_layernorm.weight": "model-00130-of-00193.safetensors",
+ "model.layers.53.self_attn.k_proj.weight": "model-00128-of-00193.safetensors",
+ "model.layers.53.self_attn.o_proj.weight": "model-00128-of-00193.safetensors",
+ "model.layers.53.self_attn.q_proj.weight": "model-00128-of-00193.safetensors",
+ "model.layers.53.self_attn.v_proj.weight": "model-00128-of-00193.safetensors",
+ "model.layers.54.block_sparse_moe.experts.0.w1.weight": "model-00131-of-00193.safetensors",
+ "model.layers.54.block_sparse_moe.experts.0.w2.weight": "model-00131-of-00193.safetensors",
+ "model.layers.54.block_sparse_moe.experts.0.w3.weight": "model-00131-of-00193.safetensors",
+ "model.layers.54.block_sparse_moe.experts.1.w1.weight": "model-00131-of-00193.safetensors",
+ "model.layers.54.block_sparse_moe.experts.1.w2.weight": "model-00131-of-00193.safetensors",
+ "model.layers.54.block_sparse_moe.experts.1.w3.weight": "model-00131-of-00193.safetensors",
+ "model.layers.54.block_sparse_moe.experts.2.w1.weight": "model-00131-of-00193.safetensors",
+ "model.layers.54.block_sparse_moe.experts.2.w2.weight": "model-00131-of-00193.safetensors",
+ "model.layers.54.block_sparse_moe.experts.2.w3.weight": "model-00131-of-00193.safetensors",
+ "model.layers.54.block_sparse_moe.experts.3.w1.weight": "model-00131-of-00193.safetensors",
+ "model.layers.54.block_sparse_moe.experts.3.w2.weight": "model-00132-of-00193.safetensors",
+ "model.layers.54.block_sparse_moe.experts.3.w3.weight": "model-00132-of-00193.safetensors",
+ "model.layers.54.block_sparse_moe.experts.4.w1.weight": "model-00132-of-00193.safetensors",
+ "model.layers.54.block_sparse_moe.experts.4.w2.weight": "model-00132-of-00193.safetensors",
+ "model.layers.54.block_sparse_moe.experts.4.w3.weight": "model-00132-of-00193.safetensors",
+ "model.layers.54.block_sparse_moe.experts.5.w1.weight": "model-00132-of-00193.safetensors",
+ "model.layers.54.block_sparse_moe.experts.5.w2.weight": "model-00132-of-00193.safetensors",
+ "model.layers.54.block_sparse_moe.experts.5.w3.weight": "model-00132-of-00193.safetensors",
+ "model.layers.54.block_sparse_moe.experts.6.w1.weight": "model-00132-of-00193.safetensors",
+ "model.layers.54.block_sparse_moe.experts.6.w2.weight": "model-00132-of-00193.safetensors",
+ "model.layers.54.block_sparse_moe.experts.6.w3.weight": "model-00133-of-00193.safetensors",
+ "model.layers.54.block_sparse_moe.experts.7.w1.weight": "model-00133-of-00193.safetensors",
+ "model.layers.54.block_sparse_moe.experts.7.w2.weight": "model-00133-of-00193.safetensors",
+ "model.layers.54.block_sparse_moe.experts.7.w3.weight": "model-00133-of-00193.safetensors",
+ "model.layers.54.block_sparse_moe.gate.weight": "model-00130-of-00193.safetensors",
+ "model.layers.54.input_layernorm.weight": "model-00133-of-00193.safetensors",
+ "model.layers.54.post_attention_layernorm.weight": "model-00133-of-00193.safetensors",
+ "model.layers.54.self_attn.k_proj.weight": "model-00130-of-00193.safetensors",
+ "model.layers.54.self_attn.o_proj.weight": "model-00130-of-00193.safetensors",
+ "model.layers.54.self_attn.q_proj.weight": "model-00130-of-00193.safetensors",
+ "model.layers.54.self_attn.v_proj.weight": "model-00130-of-00193.safetensors",
+ "model.layers.55.block_sparse_moe.experts.0.w1.weight": "model-00133-of-00193.safetensors",
+ "model.layers.55.block_sparse_moe.experts.0.w2.weight": "model-00133-of-00193.safetensors",
+ "model.layers.55.block_sparse_moe.experts.0.w3.weight": "model-00133-of-00193.safetensors",
+ "model.layers.55.block_sparse_moe.experts.1.w1.weight": "model-00133-of-00193.safetensors",
+ "model.layers.55.block_sparse_moe.experts.1.w2.weight": "model-00133-of-00193.safetensors",
+ "model.layers.55.block_sparse_moe.experts.1.w3.weight": "model-00133-of-00193.safetensors",
+ "model.layers.55.block_sparse_moe.experts.2.w1.weight": "model-00134-of-00193.safetensors",
+ "model.layers.55.block_sparse_moe.experts.2.w2.weight": "model-00134-of-00193.safetensors",
+ "model.layers.55.block_sparse_moe.experts.2.w3.weight": "model-00134-of-00193.safetensors",
+ "model.layers.55.block_sparse_moe.experts.3.w1.weight": "model-00134-of-00193.safetensors",
+ "model.layers.55.block_sparse_moe.experts.3.w2.weight": "model-00134-of-00193.safetensors",
+ "model.layers.55.block_sparse_moe.experts.3.w3.weight": "model-00134-of-00193.safetensors",
+ "model.layers.55.block_sparse_moe.experts.4.w1.weight": "model-00134-of-00193.safetensors",
+ "model.layers.55.block_sparse_moe.experts.4.w2.weight": "model-00134-of-00193.safetensors",
+ "model.layers.55.block_sparse_moe.experts.4.w3.weight": "model-00134-of-00193.safetensors",
+ "model.layers.55.block_sparse_moe.experts.5.w1.weight": "model-00134-of-00193.safetensors",
+ "model.layers.55.block_sparse_moe.experts.5.w2.weight": "model-00135-of-00193.safetensors",
+ "model.layers.55.block_sparse_moe.experts.5.w3.weight": "model-00135-of-00193.safetensors",
+ "model.layers.55.block_sparse_moe.experts.6.w1.weight": "model-00135-of-00193.safetensors",
+ "model.layers.55.block_sparse_moe.experts.6.w2.weight": "model-00135-of-00193.safetensors",
+ "model.layers.55.block_sparse_moe.experts.6.w3.weight": "model-00135-of-00193.safetensors",
+ "model.layers.55.block_sparse_moe.experts.7.w1.weight": "model-00135-of-00193.safetensors",
+ "model.layers.55.block_sparse_moe.experts.7.w2.weight": "model-00135-of-00193.safetensors",
+ "model.layers.55.block_sparse_moe.experts.7.w3.weight": "model-00135-of-00193.safetensors",
+ "model.layers.55.block_sparse_moe.gate.weight": "model-00133-of-00193.safetensors",
+ "model.layers.55.input_layernorm.weight": "model-00135-of-00193.safetensors",
+ "model.layers.55.post_attention_layernorm.weight": "model-00135-of-00193.safetensors",
+ "model.layers.55.self_attn.k_proj.weight": "model-00133-of-00193.safetensors",
+ "model.layers.55.self_attn.o_proj.weight": "model-00133-of-00193.safetensors",
+ "model.layers.55.self_attn.q_proj.weight": "model-00133-of-00193.safetensors",
+ "model.layers.55.self_attn.v_proj.weight": "model-00133-of-00193.safetensors",
+ "model.layers.56.block_sparse_moe.experts.0.w1.weight": "model-00135-of-00193.safetensors",
+ "model.layers.56.block_sparse_moe.experts.0.w2.weight": "model-00135-of-00193.safetensors",
+ "model.layers.56.block_sparse_moe.experts.0.w3.weight": "model-00136-of-00193.safetensors",
+ "model.layers.56.block_sparse_moe.experts.1.w1.weight": "model-00136-of-00193.safetensors",
+ "model.layers.56.block_sparse_moe.experts.1.w2.weight": "model-00136-of-00193.safetensors",
+ "model.layers.56.block_sparse_moe.experts.1.w3.weight": "model-00136-of-00193.safetensors",
+ "model.layers.56.block_sparse_moe.experts.2.w1.weight": "model-00136-of-00193.safetensors",
+ "model.layers.56.block_sparse_moe.experts.2.w2.weight": "model-00136-of-00193.safetensors",
+ "model.layers.56.block_sparse_moe.experts.2.w3.weight": "model-00136-of-00193.safetensors",
+ "model.layers.56.block_sparse_moe.experts.3.w1.weight": "model-00136-of-00193.safetensors",
+ "model.layers.56.block_sparse_moe.experts.3.w2.weight": "model-00136-of-00193.safetensors",
+ "model.layers.56.block_sparse_moe.experts.3.w3.weight": "model-00136-of-00193.safetensors",
+ "model.layers.56.block_sparse_moe.experts.4.w1.weight": "model-00137-of-00193.safetensors",
+ "model.layers.56.block_sparse_moe.experts.4.w2.weight": "model-00137-of-00193.safetensors",
+ "model.layers.56.block_sparse_moe.experts.4.w3.weight": "model-00137-of-00193.safetensors",
+ "model.layers.56.block_sparse_moe.experts.5.w1.weight": "model-00137-of-00193.safetensors",
+ "model.layers.56.block_sparse_moe.experts.5.w2.weight": "model-00137-of-00193.safetensors",
+ "model.layers.56.block_sparse_moe.experts.5.w3.weight": "model-00137-of-00193.safetensors",
+ "model.layers.56.block_sparse_moe.experts.6.w1.weight": "model-00137-of-00193.safetensors",
+ "model.layers.56.block_sparse_moe.experts.6.w2.weight": "model-00137-of-00193.safetensors",
+ "model.layers.56.block_sparse_moe.experts.6.w3.weight": "model-00137-of-00193.safetensors",
+ "model.layers.56.block_sparse_moe.experts.7.w1.weight": "model-00137-of-00193.safetensors",
+ "model.layers.56.block_sparse_moe.experts.7.w2.weight": "model-00138-of-00193.safetensors",
+ "model.layers.56.block_sparse_moe.experts.7.w3.weight": "model-00138-of-00193.safetensors",
+ "model.layers.56.block_sparse_moe.gate.weight": "model-00135-of-00193.safetensors",
+ "model.layers.56.input_layernorm.weight": "model-00138-of-00193.safetensors",
+ "model.layers.56.post_attention_layernorm.weight": "model-00138-of-00193.safetensors",
+ "model.layers.56.self_attn.k_proj.weight": "model-00135-of-00193.safetensors",
+ "model.layers.56.self_attn.o_proj.weight": "model-00135-of-00193.safetensors",
+ "model.layers.56.self_attn.q_proj.weight": "model-00135-of-00193.safetensors",
+ "model.layers.56.self_attn.v_proj.weight": "model-00135-of-00193.safetensors",
+ "model.layers.57.block_sparse_moe.experts.0.w1.weight": "model-00138-of-00193.safetensors",
+ "model.layers.57.block_sparse_moe.experts.0.w2.weight": "model-00138-of-00193.safetensors",
+ "model.layers.57.block_sparse_moe.experts.0.w3.weight": "model-00138-of-00193.safetensors",
+ "model.layers.57.block_sparse_moe.experts.1.w1.weight": "model-00138-of-00193.safetensors",
+ "model.layers.57.block_sparse_moe.experts.1.w2.weight": "model-00138-of-00193.safetensors",
+ "model.layers.57.block_sparse_moe.experts.1.w3.weight": "model-00138-of-00193.safetensors",
+ "model.layers.57.block_sparse_moe.experts.2.w1.weight": "model-00138-of-00193.safetensors",
+ "model.layers.57.block_sparse_moe.experts.2.w2.weight": "model-00138-of-00193.safetensors",
+ "model.layers.57.block_sparse_moe.experts.2.w3.weight": "model-00139-of-00193.safetensors",
+ "model.layers.57.block_sparse_moe.experts.3.w1.weight": "model-00139-of-00193.safetensors",
+ "model.layers.57.block_sparse_moe.experts.3.w2.weight": "model-00139-of-00193.safetensors",
+ "model.layers.57.block_sparse_moe.experts.3.w3.weight": "model-00139-of-00193.safetensors",
+ "model.layers.57.block_sparse_moe.experts.4.w1.weight": "model-00139-of-00193.safetensors",
+ "model.layers.57.block_sparse_moe.experts.4.w2.weight": "model-00139-of-00193.safetensors",
+ "model.layers.57.block_sparse_moe.experts.4.w3.weight": "model-00139-of-00193.safetensors",
+ "model.layers.57.block_sparse_moe.experts.5.w1.weight": "model-00139-of-00193.safetensors",
+ "model.layers.57.block_sparse_moe.experts.5.w2.weight": "model-00139-of-00193.safetensors",
+ "model.layers.57.block_sparse_moe.experts.5.w3.weight": "model-00139-of-00193.safetensors",
+ "model.layers.57.block_sparse_moe.experts.6.w1.weight": "model-00140-of-00193.safetensors",
+ "model.layers.57.block_sparse_moe.experts.6.w2.weight": "model-00140-of-00193.safetensors",
+ "model.layers.57.block_sparse_moe.experts.6.w3.weight": "model-00140-of-00193.safetensors",
+ "model.layers.57.block_sparse_moe.experts.7.w1.weight": "model-00140-of-00193.safetensors",
+ "model.layers.57.block_sparse_moe.experts.7.w2.weight": "model-00140-of-00193.safetensors",
+ "model.layers.57.block_sparse_moe.experts.7.w3.weight": "model-00140-of-00193.safetensors",
+ "model.layers.57.block_sparse_moe.gate.weight": "model-00138-of-00193.safetensors",
+ "model.layers.57.input_layernorm.weight": "model-00140-of-00193.safetensors",
+ "model.layers.57.post_attention_layernorm.weight": "model-00140-of-00193.safetensors",
+ "model.layers.57.self_attn.k_proj.weight": "model-00138-of-00193.safetensors",
+ "model.layers.57.self_attn.o_proj.weight": "model-00138-of-00193.safetensors",
+ "model.layers.57.self_attn.q_proj.weight": "model-00138-of-00193.safetensors",
+ "model.layers.57.self_attn.v_proj.weight": "model-00138-of-00193.safetensors",
+ "model.layers.58.block_sparse_moe.experts.0.w1.weight": "model-00140-of-00193.safetensors",
+ "model.layers.58.block_sparse_moe.experts.0.w2.weight": "model-00140-of-00193.safetensors",
+ "model.layers.58.block_sparse_moe.experts.0.w3.weight": "model-00140-of-00193.safetensors",
+ "model.layers.58.block_sparse_moe.experts.1.w1.weight": "model-00140-of-00193.safetensors",
+ "model.layers.58.block_sparse_moe.experts.1.w2.weight": "model-00141-of-00193.safetensors",
+ "model.layers.58.block_sparse_moe.experts.1.w3.weight": "model-00141-of-00193.safetensors",
+ "model.layers.58.block_sparse_moe.experts.2.w1.weight": "model-00141-of-00193.safetensors",
+ "model.layers.58.block_sparse_moe.experts.2.w2.weight": "model-00141-of-00193.safetensors",
+ "model.layers.58.block_sparse_moe.experts.2.w3.weight": "model-00141-of-00193.safetensors",
+ "model.layers.58.block_sparse_moe.experts.3.w1.weight": "model-00141-of-00193.safetensors",
+ "model.layers.58.block_sparse_moe.experts.3.w2.weight": "model-00141-of-00193.safetensors",
+ "model.layers.58.block_sparse_moe.experts.3.w3.weight": "model-00141-of-00193.safetensors",
+ "model.layers.58.block_sparse_moe.experts.4.w1.weight": "model-00141-of-00193.safetensors",
+ "model.layers.58.block_sparse_moe.experts.4.w2.weight": "model-00141-of-00193.safetensors",
+ "model.layers.58.block_sparse_moe.experts.4.w3.weight": "model-00142-of-00193.safetensors",
+ "model.layers.58.block_sparse_moe.experts.5.w1.weight": "model-00142-of-00193.safetensors",
+ "model.layers.58.block_sparse_moe.experts.5.w2.weight": "model-00142-of-00193.safetensors",
+ "model.layers.58.block_sparse_moe.experts.5.w3.weight": "model-00142-of-00193.safetensors",
+ "model.layers.58.block_sparse_moe.experts.6.w1.weight": "model-00142-of-00193.safetensors",
+ "model.layers.58.block_sparse_moe.experts.6.w2.weight": "model-00142-of-00193.safetensors",
+ "model.layers.58.block_sparse_moe.experts.6.w3.weight": "model-00142-of-00193.safetensors",
+ "model.layers.58.block_sparse_moe.experts.7.w1.weight": "model-00142-of-00193.safetensors",
+ "model.layers.58.block_sparse_moe.experts.7.w2.weight": "model-00142-of-00193.safetensors",
+ "model.layers.58.block_sparse_moe.experts.7.w3.weight": "model-00142-of-00193.safetensors",
+ "model.layers.58.block_sparse_moe.gate.weight": "model-00140-of-00193.safetensors",
+ "model.layers.58.input_layernorm.weight": "model-00142-of-00193.safetensors",
+ "model.layers.58.post_attention_layernorm.weight": "model-00142-of-00193.safetensors",
+ "model.layers.58.self_attn.k_proj.weight": "model-00140-of-00193.safetensors",
+ "model.layers.58.self_attn.o_proj.weight": "model-00140-of-00193.safetensors",
+ "model.layers.58.self_attn.q_proj.weight": "model-00140-of-00193.safetensors",
+ "model.layers.58.self_attn.v_proj.weight": "model-00140-of-00193.safetensors",
+ "model.layers.59.block_sparse_moe.experts.0.w1.weight": "model-00143-of-00193.safetensors",
+ "model.layers.59.block_sparse_moe.experts.0.w2.weight": "model-00143-of-00193.safetensors",
+ "model.layers.59.block_sparse_moe.experts.0.w3.weight": "model-00143-of-00193.safetensors",
+ "model.layers.59.block_sparse_moe.experts.1.w1.weight": "model-00143-of-00193.safetensors",
+ "model.layers.59.block_sparse_moe.experts.1.w2.weight": "model-00143-of-00193.safetensors",
+ "model.layers.59.block_sparse_moe.experts.1.w3.weight": "model-00143-of-00193.safetensors",
+ "model.layers.59.block_sparse_moe.experts.2.w1.weight": "model-00143-of-00193.safetensors",
+ "model.layers.59.block_sparse_moe.experts.2.w2.weight": "model-00143-of-00193.safetensors",
+ "model.layers.59.block_sparse_moe.experts.2.w3.weight": "model-00143-of-00193.safetensors",
+ "model.layers.59.block_sparse_moe.experts.3.w1.weight": "model-00143-of-00193.safetensors",
+ "model.layers.59.block_sparse_moe.experts.3.w2.weight": "model-00144-of-00193.safetensors",
+ "model.layers.59.block_sparse_moe.experts.3.w3.weight": "model-00144-of-00193.safetensors",
+ "model.layers.59.block_sparse_moe.experts.4.w1.weight": "model-00144-of-00193.safetensors",
+ "model.layers.59.block_sparse_moe.experts.4.w2.weight": "model-00144-of-00193.safetensors",
+ "model.layers.59.block_sparse_moe.experts.4.w3.weight": "model-00144-of-00193.safetensors",
+ "model.layers.59.block_sparse_moe.experts.5.w1.weight": "model-00144-of-00193.safetensors",
+ "model.layers.59.block_sparse_moe.experts.5.w2.weight": "model-00144-of-00193.safetensors",
+ "model.layers.59.block_sparse_moe.experts.5.w3.weight": "model-00144-of-00193.safetensors",
+ "model.layers.59.block_sparse_moe.experts.6.w1.weight": "model-00144-of-00193.safetensors",
+ "model.layers.59.block_sparse_moe.experts.6.w2.weight": "model-00144-of-00193.safetensors",
+ "model.layers.59.block_sparse_moe.experts.6.w3.weight": "model-00145-of-00193.safetensors",
+ "model.layers.59.block_sparse_moe.experts.7.w1.weight": "model-00145-of-00193.safetensors",
+ "model.layers.59.block_sparse_moe.experts.7.w2.weight": "model-00145-of-00193.safetensors",
+ "model.layers.59.block_sparse_moe.experts.7.w3.weight": "model-00145-of-00193.safetensors",
+ "model.layers.59.block_sparse_moe.gate.weight": "model-00142-of-00193.safetensors",
+ "model.layers.59.input_layernorm.weight": "model-00145-of-00193.safetensors",
+ "model.layers.59.post_attention_layernorm.weight": "model-00145-of-00193.safetensors",
+ "model.layers.59.self_attn.k_proj.weight": "model-00142-of-00193.safetensors",
+ "model.layers.59.self_attn.o_proj.weight": "model-00142-of-00193.safetensors",
+ "model.layers.59.self_attn.q_proj.weight": "model-00142-of-00193.safetensors",
+ "model.layers.59.self_attn.v_proj.weight": "model-00142-of-00193.safetensors",
+ "model.layers.6.block_sparse_moe.experts.0.w1.weight": "model-00015-of-00193.safetensors",
+ "model.layers.6.block_sparse_moe.experts.0.w2.weight": "model-00015-of-00193.safetensors",
+ "model.layers.6.block_sparse_moe.experts.0.w3.weight": "model-00016-of-00193.safetensors",
+ "model.layers.6.block_sparse_moe.experts.1.w1.weight": "model-00016-of-00193.safetensors",
+ "model.layers.6.block_sparse_moe.experts.1.w2.weight": "model-00016-of-00193.safetensors",
+ "model.layers.6.block_sparse_moe.experts.1.w3.weight": "model-00016-of-00193.safetensors",
+ "model.layers.6.block_sparse_moe.experts.2.w1.weight": "model-00016-of-00193.safetensors",
+ "model.layers.6.block_sparse_moe.experts.2.w2.weight": "model-00016-of-00193.safetensors",
+ "model.layers.6.block_sparse_moe.experts.2.w3.weight": "model-00016-of-00193.safetensors",
+ "model.layers.6.block_sparse_moe.experts.3.w1.weight": "model-00016-of-00193.safetensors",
+ "model.layers.6.block_sparse_moe.experts.3.w2.weight": "model-00016-of-00193.safetensors",
+ "model.layers.6.block_sparse_moe.experts.3.w3.weight": "model-00016-of-00193.safetensors",
+ "model.layers.6.block_sparse_moe.experts.4.w1.weight": "model-00017-of-00193.safetensors",
+ "model.layers.6.block_sparse_moe.experts.4.w2.weight": "model-00017-of-00193.safetensors",
+ "model.layers.6.block_sparse_moe.experts.4.w3.weight": "model-00017-of-00193.safetensors",
+ "model.layers.6.block_sparse_moe.experts.5.w1.weight": "model-00017-of-00193.safetensors",
+ "model.layers.6.block_sparse_moe.experts.5.w2.weight": "model-00017-of-00193.safetensors",
+ "model.layers.6.block_sparse_moe.experts.5.w3.weight": "model-00017-of-00193.safetensors",
+ "model.layers.6.block_sparse_moe.experts.6.w1.weight": "model-00017-of-00193.safetensors",
+ "model.layers.6.block_sparse_moe.experts.6.w2.weight": "model-00017-of-00193.safetensors",
+ "model.layers.6.block_sparse_moe.experts.6.w3.weight": "model-00017-of-00193.safetensors",
+ "model.layers.6.block_sparse_moe.experts.7.w1.weight": "model-00017-of-00193.safetensors",
+ "model.layers.6.block_sparse_moe.experts.7.w2.weight": "model-00018-of-00193.safetensors",
+ "model.layers.6.block_sparse_moe.experts.7.w3.weight": "model-00018-of-00193.safetensors",
+ "model.layers.6.block_sparse_moe.gate.weight": "model-00015-of-00193.safetensors",
+ "model.layers.6.input_layernorm.weight": "model-00018-of-00193.safetensors",
+ "model.layers.6.post_attention_layernorm.weight": "model-00018-of-00193.safetensors",
+ "model.layers.6.self_attn.k_proj.weight": "model-00015-of-00193.safetensors",
+ "model.layers.6.self_attn.o_proj.weight": "model-00015-of-00193.safetensors",
+ "model.layers.6.self_attn.q_proj.weight": "model-00015-of-00193.safetensors",
+ "model.layers.6.self_attn.v_proj.weight": "model-00015-of-00193.safetensors",
+ "model.layers.60.block_sparse_moe.experts.0.w1.weight": "model-00145-of-00193.safetensors",
+ "model.layers.60.block_sparse_moe.experts.0.w2.weight": "model-00145-of-00193.safetensors",
+ "model.layers.60.block_sparse_moe.experts.0.w3.weight": "model-00145-of-00193.safetensors",
+ "model.layers.60.block_sparse_moe.experts.1.w1.weight": "model-00145-of-00193.safetensors",
+ "model.layers.60.block_sparse_moe.experts.1.w2.weight": "model-00145-of-00193.safetensors",
+ "model.layers.60.block_sparse_moe.experts.1.w3.weight": "model-00145-of-00193.safetensors",
+ "model.layers.60.block_sparse_moe.experts.2.w1.weight": "model-00146-of-00193.safetensors",
+ "model.layers.60.block_sparse_moe.experts.2.w2.weight": "model-00146-of-00193.safetensors",
+ "model.layers.60.block_sparse_moe.experts.2.w3.weight": "model-00146-of-00193.safetensors",
+ "model.layers.60.block_sparse_moe.experts.3.w1.weight": "model-00146-of-00193.safetensors",
+ "model.layers.60.block_sparse_moe.experts.3.w2.weight": "model-00146-of-00193.safetensors",
+ "model.layers.60.block_sparse_moe.experts.3.w3.weight": "model-00146-of-00193.safetensors",
+ "model.layers.60.block_sparse_moe.experts.4.w1.weight": "model-00146-of-00193.safetensors",
+ "model.layers.60.block_sparse_moe.experts.4.w2.weight": "model-00146-of-00193.safetensors",
+ "model.layers.60.block_sparse_moe.experts.4.w3.weight": "model-00146-of-00193.safetensors",
+ "model.layers.60.block_sparse_moe.experts.5.w1.weight": "model-00146-of-00193.safetensors",
+ "model.layers.60.block_sparse_moe.experts.5.w2.weight": "model-00147-of-00193.safetensors",
+ "model.layers.60.block_sparse_moe.experts.5.w3.weight": "model-00147-of-00193.safetensors",
+ "model.layers.60.block_sparse_moe.experts.6.w1.weight": "model-00147-of-00193.safetensors",
+ "model.layers.60.block_sparse_moe.experts.6.w2.weight": "model-00147-of-00193.safetensors",
+ "model.layers.60.block_sparse_moe.experts.6.w3.weight": "model-00147-of-00193.safetensors",
+ "model.layers.60.block_sparse_moe.experts.7.w1.weight": "model-00147-of-00193.safetensors",
+ "model.layers.60.block_sparse_moe.experts.7.w2.weight": "model-00147-of-00193.safetensors",
+ "model.layers.60.block_sparse_moe.experts.7.w3.weight": "model-00147-of-00193.safetensors",
+ "model.layers.60.block_sparse_moe.gate.weight": "model-00145-of-00193.safetensors",
+ "model.layers.60.input_layernorm.weight": "model-00147-of-00193.safetensors",
+ "model.layers.60.post_attention_layernorm.weight": "model-00147-of-00193.safetensors",
+ "model.layers.60.self_attn.k_proj.weight": "model-00145-of-00193.safetensors",
+ "model.layers.60.self_attn.o_proj.weight": "model-00145-of-00193.safetensors",
+ "model.layers.60.self_attn.q_proj.weight": "model-00145-of-00193.safetensors",
+ "model.layers.60.self_attn.v_proj.weight": "model-00145-of-00193.safetensors",
+ "model.layers.61.block_sparse_moe.experts.0.w1.weight": "model-00147-of-00193.safetensors",
+ "model.layers.61.block_sparse_moe.experts.0.w2.weight": "model-00147-of-00193.safetensors",
+ "model.layers.61.block_sparse_moe.experts.0.w3.weight": "model-00148-of-00193.safetensors",
+ "model.layers.61.block_sparse_moe.experts.1.w1.weight": "model-00148-of-00193.safetensors",
+ "model.layers.61.block_sparse_moe.experts.1.w2.weight": "model-00148-of-00193.safetensors",
+ "model.layers.61.block_sparse_moe.experts.1.w3.weight": "model-00148-of-00193.safetensors",
+ "model.layers.61.block_sparse_moe.experts.2.w1.weight": "model-00148-of-00193.safetensors",
+ "model.layers.61.block_sparse_moe.experts.2.w2.weight": "model-00148-of-00193.safetensors",
+ "model.layers.61.block_sparse_moe.experts.2.w3.weight": "model-00148-of-00193.safetensors",
+ "model.layers.61.block_sparse_moe.experts.3.w1.weight": "model-00148-of-00193.safetensors",
+ "model.layers.61.block_sparse_moe.experts.3.w2.weight": "model-00148-of-00193.safetensors",
+ "model.layers.61.block_sparse_moe.experts.3.w3.weight": "model-00148-of-00193.safetensors",
+ "model.layers.61.block_sparse_moe.experts.4.w1.weight": "model-00149-of-00193.safetensors",
+ "model.layers.61.block_sparse_moe.experts.4.w2.weight": "model-00149-of-00193.safetensors",
+ "model.layers.61.block_sparse_moe.experts.4.w3.weight": "model-00149-of-00193.safetensors",
+ "model.layers.61.block_sparse_moe.experts.5.w1.weight": "model-00149-of-00193.safetensors",
+ "model.layers.61.block_sparse_moe.experts.5.w2.weight": "model-00149-of-00193.safetensors",
+ "model.layers.61.block_sparse_moe.experts.5.w3.weight": "model-00149-of-00193.safetensors",
+ "model.layers.61.block_sparse_moe.experts.6.w1.weight": "model-00149-of-00193.safetensors",
+ "model.layers.61.block_sparse_moe.experts.6.w2.weight": "model-00149-of-00193.safetensors",
+ "model.layers.61.block_sparse_moe.experts.6.w3.weight": "model-00149-of-00193.safetensors",
+ "model.layers.61.block_sparse_moe.experts.7.w1.weight": "model-00149-of-00193.safetensors",
+ "model.layers.61.block_sparse_moe.experts.7.w2.weight": "model-00150-of-00193.safetensors",
+ "model.layers.61.block_sparse_moe.experts.7.w3.weight": "model-00150-of-00193.safetensors",
+ "model.layers.61.block_sparse_moe.gate.weight": "model-00147-of-00193.safetensors",
+ "model.layers.61.input_layernorm.weight": "model-00150-of-00193.safetensors",
+ "model.layers.61.post_attention_layernorm.weight": "model-00150-of-00193.safetensors",
+ "model.layers.61.self_attn.k_proj.weight": "model-00147-of-00193.safetensors",
+ "model.layers.61.self_attn.o_proj.weight": "model-00147-of-00193.safetensors",
+ "model.layers.61.self_attn.q_proj.weight": "model-00147-of-00193.safetensors",
+ "model.layers.61.self_attn.v_proj.weight": "model-00147-of-00193.safetensors",
+ "model.layers.62.block_sparse_moe.experts.0.w1.weight": "model-00150-of-00193.safetensors",
+ "model.layers.62.block_sparse_moe.experts.0.w2.weight": "model-00150-of-00193.safetensors",
+ "model.layers.62.block_sparse_moe.experts.0.w3.weight": "model-00150-of-00193.safetensors",
+ "model.layers.62.block_sparse_moe.experts.1.w1.weight": "model-00150-of-00193.safetensors",
+ "model.layers.62.block_sparse_moe.experts.1.w2.weight": "model-00150-of-00193.safetensors",
+ "model.layers.62.block_sparse_moe.experts.1.w3.weight": "model-00150-of-00193.safetensors",
+ "model.layers.62.block_sparse_moe.experts.2.w1.weight": "model-00150-of-00193.safetensors",
+ "model.layers.62.block_sparse_moe.experts.2.w2.weight": "model-00150-of-00193.safetensors",
+ "model.layers.62.block_sparse_moe.experts.2.w3.weight": "model-00151-of-00193.safetensors",
+ "model.layers.62.block_sparse_moe.experts.3.w1.weight": "model-00151-of-00193.safetensors",
+ "model.layers.62.block_sparse_moe.experts.3.w2.weight": "model-00151-of-00193.safetensors",
+ "model.layers.62.block_sparse_moe.experts.3.w3.weight": "model-00151-of-00193.safetensors",
+ "model.layers.62.block_sparse_moe.experts.4.w1.weight": "model-00151-of-00193.safetensors",
+ "model.layers.62.block_sparse_moe.experts.4.w2.weight": "model-00151-of-00193.safetensors",
+ "model.layers.62.block_sparse_moe.experts.4.w3.weight": "model-00151-of-00193.safetensors",
+ "model.layers.62.block_sparse_moe.experts.5.w1.weight": "model-00151-of-00193.safetensors",
+ "model.layers.62.block_sparse_moe.experts.5.w2.weight": "model-00151-of-00193.safetensors",
+ "model.layers.62.block_sparse_moe.experts.5.w3.weight": "model-00151-of-00193.safetensors",
+ "model.layers.62.block_sparse_moe.experts.6.w1.weight": "model-00152-of-00193.safetensors",
+ "model.layers.62.block_sparse_moe.experts.6.w2.weight": "model-00152-of-00193.safetensors",
+ "model.layers.62.block_sparse_moe.experts.6.w3.weight": "model-00152-of-00193.safetensors",
+ "model.layers.62.block_sparse_moe.experts.7.w1.weight": "model-00152-of-00193.safetensors",
+ "model.layers.62.block_sparse_moe.experts.7.w2.weight": "model-00152-of-00193.safetensors",
+ "model.layers.62.block_sparse_moe.experts.7.w3.weight": "model-00152-of-00193.safetensors",
+ "model.layers.62.block_sparse_moe.gate.weight": "model-00150-of-00193.safetensors",
+ "model.layers.62.input_layernorm.weight": "model-00152-of-00193.safetensors",
+ "model.layers.62.post_attention_layernorm.weight": "model-00152-of-00193.safetensors",
+ "model.layers.62.self_attn.k_proj.weight": "model-00150-of-00193.safetensors",
+ "model.layers.62.self_attn.o_proj.weight": "model-00150-of-00193.safetensors",
+ "model.layers.62.self_attn.q_proj.weight": "model-00150-of-00193.safetensors",
+ "model.layers.62.self_attn.v_proj.weight": "model-00150-of-00193.safetensors",
+ "model.layers.63.block_sparse_moe.experts.0.w1.weight": "model-00152-of-00193.safetensors",
+ "model.layers.63.block_sparse_moe.experts.0.w2.weight": "model-00152-of-00193.safetensors",
+ "model.layers.63.block_sparse_moe.experts.0.w3.weight": "model-00152-of-00193.safetensors",
+ "model.layers.63.block_sparse_moe.experts.1.w1.weight": "model-00152-of-00193.safetensors",
+ "model.layers.63.block_sparse_moe.experts.1.w2.weight": "model-00153-of-00193.safetensors",
+ "model.layers.63.block_sparse_moe.experts.1.w3.weight": "model-00153-of-00193.safetensors",
+ "model.layers.63.block_sparse_moe.experts.2.w1.weight": "model-00153-of-00193.safetensors",
+ "model.layers.63.block_sparse_moe.experts.2.w2.weight": "model-00153-of-00193.safetensors",
+ "model.layers.63.block_sparse_moe.experts.2.w3.weight": "model-00153-of-00193.safetensors",
+ "model.layers.63.block_sparse_moe.experts.3.w1.weight": "model-00153-of-00193.safetensors",
+ "model.layers.63.block_sparse_moe.experts.3.w2.weight": "model-00153-of-00193.safetensors",
+ "model.layers.63.block_sparse_moe.experts.3.w3.weight": "model-00153-of-00193.safetensors",
+ "model.layers.63.block_sparse_moe.experts.4.w1.weight": "model-00153-of-00193.safetensors",
+ "model.layers.63.block_sparse_moe.experts.4.w2.weight": "model-00153-of-00193.safetensors",
+ "model.layers.63.block_sparse_moe.experts.4.w3.weight": "model-00154-of-00193.safetensors",
+ "model.layers.63.block_sparse_moe.experts.5.w1.weight": "model-00154-of-00193.safetensors",
+ "model.layers.63.block_sparse_moe.experts.5.w2.weight": "model-00154-of-00193.safetensors",
+ "model.layers.63.block_sparse_moe.experts.5.w3.weight": "model-00154-of-00193.safetensors",
+ "model.layers.63.block_sparse_moe.experts.6.w1.weight": "model-00154-of-00193.safetensors",
+ "model.layers.63.block_sparse_moe.experts.6.w2.weight": "model-00154-of-00193.safetensors",
+ "model.layers.63.block_sparse_moe.experts.6.w3.weight": "model-00154-of-00193.safetensors",
+ "model.layers.63.block_sparse_moe.experts.7.w1.weight": "model-00154-of-00193.safetensors",
+ "model.layers.63.block_sparse_moe.experts.7.w2.weight": "model-00154-of-00193.safetensors",
+ "model.layers.63.block_sparse_moe.experts.7.w3.weight": "model-00154-of-00193.safetensors",
+ "model.layers.63.block_sparse_moe.gate.weight": "model-00152-of-00193.safetensors",
+ "model.layers.63.input_layernorm.weight": "model-00154-of-00193.safetensors",
+ "model.layers.63.post_attention_layernorm.weight": "model-00154-of-00193.safetensors",
+ "model.layers.63.self_attn.k_proj.weight": "model-00152-of-00193.safetensors",
+ "model.layers.63.self_attn.o_proj.weight": "model-00152-of-00193.safetensors",
+ "model.layers.63.self_attn.q_proj.weight": "model-00152-of-00193.safetensors",
+ "model.layers.63.self_attn.v_proj.weight": "model-00152-of-00193.safetensors",
+ "model.layers.64.block_sparse_moe.experts.0.w1.weight": "model-00155-of-00193.safetensors",
+ "model.layers.64.block_sparse_moe.experts.0.w2.weight": "model-00155-of-00193.safetensors",
+ "model.layers.64.block_sparse_moe.experts.0.w3.weight": "model-00155-of-00193.safetensors",
+ "model.layers.64.block_sparse_moe.experts.1.w1.weight": "model-00155-of-00193.safetensors",
+ "model.layers.64.block_sparse_moe.experts.1.w2.weight": "model-00155-of-00193.safetensors",
+ "model.layers.64.block_sparse_moe.experts.1.w3.weight": "model-00155-of-00193.safetensors",
+ "model.layers.64.block_sparse_moe.experts.2.w1.weight": "model-00155-of-00193.safetensors",
+ "model.layers.64.block_sparse_moe.experts.2.w2.weight": "model-00155-of-00193.safetensors",
+ "model.layers.64.block_sparse_moe.experts.2.w3.weight": "model-00155-of-00193.safetensors",
+ "model.layers.64.block_sparse_moe.experts.3.w1.weight": "model-00155-of-00193.safetensors",
+ "model.layers.64.block_sparse_moe.experts.3.w2.weight": "model-00156-of-00193.safetensors",
+ "model.layers.64.block_sparse_moe.experts.3.w3.weight": "model-00156-of-00193.safetensors",
+ "model.layers.64.block_sparse_moe.experts.4.w1.weight": "model-00156-of-00193.safetensors",
+ "model.layers.64.block_sparse_moe.experts.4.w2.weight": "model-00156-of-00193.safetensors",
+ "model.layers.64.block_sparse_moe.experts.4.w3.weight": "model-00156-of-00193.safetensors",
+ "model.layers.64.block_sparse_moe.experts.5.w1.weight": "model-00156-of-00193.safetensors",
+ "model.layers.64.block_sparse_moe.experts.5.w2.weight": "model-00156-of-00193.safetensors",
+ "model.layers.64.block_sparse_moe.experts.5.w3.weight": "model-00156-of-00193.safetensors",
+ "model.layers.64.block_sparse_moe.experts.6.w1.weight": "model-00156-of-00193.safetensors",
+ "model.layers.64.block_sparse_moe.experts.6.w2.weight": "model-00156-of-00193.safetensors",
+ "model.layers.64.block_sparse_moe.experts.6.w3.weight": "model-00157-of-00193.safetensors",
+ "model.layers.64.block_sparse_moe.experts.7.w1.weight": "model-00157-of-00193.safetensors",
+ "model.layers.64.block_sparse_moe.experts.7.w2.weight": "model-00157-of-00193.safetensors",
+ "model.layers.64.block_sparse_moe.experts.7.w3.weight": "model-00157-of-00193.safetensors",
+ "model.layers.64.block_sparse_moe.gate.weight": "model-00154-of-00193.safetensors",
+ "model.layers.64.input_layernorm.weight": "model-00157-of-00193.safetensors",
+ "model.layers.64.post_attention_layernorm.weight": "model-00157-of-00193.safetensors",
+ "model.layers.64.self_attn.k_proj.weight": "model-00154-of-00193.safetensors",
+ "model.layers.64.self_attn.o_proj.weight": "model-00154-of-00193.safetensors",
+ "model.layers.64.self_attn.q_proj.weight": "model-00154-of-00193.safetensors",
+ "model.layers.64.self_attn.v_proj.weight": "model-00154-of-00193.safetensors",
+ "model.layers.65.block_sparse_moe.experts.0.w1.weight": "model-00157-of-00193.safetensors",
+ "model.layers.65.block_sparse_moe.experts.0.w2.weight": "model-00157-of-00193.safetensors",
+ "model.layers.65.block_sparse_moe.experts.0.w3.weight": "model-00157-of-00193.safetensors",
+ "model.layers.65.block_sparse_moe.experts.1.w1.weight": "model-00157-of-00193.safetensors",
+ "model.layers.65.block_sparse_moe.experts.1.w2.weight": "model-00157-of-00193.safetensors",
+ "model.layers.65.block_sparse_moe.experts.1.w3.weight": "model-00157-of-00193.safetensors",
+ "model.layers.65.block_sparse_moe.experts.2.w1.weight": "model-00158-of-00193.safetensors",
+ "model.layers.65.block_sparse_moe.experts.2.w2.weight": "model-00158-of-00193.safetensors",
+ "model.layers.65.block_sparse_moe.experts.2.w3.weight": "model-00158-of-00193.safetensors",
+ "model.layers.65.block_sparse_moe.experts.3.w1.weight": "model-00158-of-00193.safetensors",
+ "model.layers.65.block_sparse_moe.experts.3.w2.weight": "model-00158-of-00193.safetensors",
+ "model.layers.65.block_sparse_moe.experts.3.w3.weight": "model-00158-of-00193.safetensors",
+ "model.layers.65.block_sparse_moe.experts.4.w1.weight": "model-00158-of-00193.safetensors",
+ "model.layers.65.block_sparse_moe.experts.4.w2.weight": "model-00158-of-00193.safetensors",
+ "model.layers.65.block_sparse_moe.experts.4.w3.weight": "model-00158-of-00193.safetensors",
+ "model.layers.65.block_sparse_moe.experts.5.w1.weight": "model-00158-of-00193.safetensors",
+ "model.layers.65.block_sparse_moe.experts.5.w2.weight": "model-00159-of-00193.safetensors",
+ "model.layers.65.block_sparse_moe.experts.5.w3.weight": "model-00159-of-00193.safetensors",
+ "model.layers.65.block_sparse_moe.experts.6.w1.weight": "model-00159-of-00193.safetensors",
+ "model.layers.65.block_sparse_moe.experts.6.w2.weight": "model-00159-of-00193.safetensors",
+ "model.layers.65.block_sparse_moe.experts.6.w3.weight": "model-00159-of-00193.safetensors",
+ "model.layers.65.block_sparse_moe.experts.7.w1.weight": "model-00159-of-00193.safetensors",
+ "model.layers.65.block_sparse_moe.experts.7.w2.weight": "model-00159-of-00193.safetensors",
+ "model.layers.65.block_sparse_moe.experts.7.w3.weight": "model-00159-of-00193.safetensors",
+ "model.layers.65.block_sparse_moe.gate.weight": "model-00157-of-00193.safetensors",
+ "model.layers.65.input_layernorm.weight": "model-00159-of-00193.safetensors",
+ "model.layers.65.post_attention_layernorm.weight": "model-00159-of-00193.safetensors",
+ "model.layers.65.self_attn.k_proj.weight": "model-00157-of-00193.safetensors",
+ "model.layers.65.self_attn.o_proj.weight": "model-00157-of-00193.safetensors",
+ "model.layers.65.self_attn.q_proj.weight": "model-00157-of-00193.safetensors",
+ "model.layers.65.self_attn.v_proj.weight": "model-00157-of-00193.safetensors",
+ "model.layers.66.block_sparse_moe.experts.0.w1.weight": "model-00159-of-00193.safetensors",
+ "model.layers.66.block_sparse_moe.experts.0.w2.weight": "model-00159-of-00193.safetensors",
+ "model.layers.66.block_sparse_moe.experts.0.w3.weight": "model-00160-of-00193.safetensors",
+ "model.layers.66.block_sparse_moe.experts.1.w1.weight": "model-00160-of-00193.safetensors",
+ "model.layers.66.block_sparse_moe.experts.1.w2.weight": "model-00160-of-00193.safetensors",
+ "model.layers.66.block_sparse_moe.experts.1.w3.weight": "model-00160-of-00193.safetensors",
+ "model.layers.66.block_sparse_moe.experts.2.w1.weight": "model-00160-of-00193.safetensors",
+ "model.layers.66.block_sparse_moe.experts.2.w2.weight": "model-00160-of-00193.safetensors",
+ "model.layers.66.block_sparse_moe.experts.2.w3.weight": "model-00160-of-00193.safetensors",
+ "model.layers.66.block_sparse_moe.experts.3.w1.weight": "model-00160-of-00193.safetensors",
+ "model.layers.66.block_sparse_moe.experts.3.w2.weight": "model-00160-of-00193.safetensors",
+ "model.layers.66.block_sparse_moe.experts.3.w3.weight": "model-00160-of-00193.safetensors",
+ "model.layers.66.block_sparse_moe.experts.4.w1.weight": "model-00161-of-00193.safetensors",
+ "model.layers.66.block_sparse_moe.experts.4.w2.weight": "model-00161-of-00193.safetensors",
+ "model.layers.66.block_sparse_moe.experts.4.w3.weight": "model-00161-of-00193.safetensors",
+ "model.layers.66.block_sparse_moe.experts.5.w1.weight": "model-00161-of-00193.safetensors",
+ "model.layers.66.block_sparse_moe.experts.5.w2.weight": "model-00161-of-00193.safetensors",
+ "model.layers.66.block_sparse_moe.experts.5.w3.weight": "model-00161-of-00193.safetensors",
+ "model.layers.66.block_sparse_moe.experts.6.w1.weight": "model-00161-of-00193.safetensors",
+ "model.layers.66.block_sparse_moe.experts.6.w2.weight": "model-00161-of-00193.safetensors",
+ "model.layers.66.block_sparse_moe.experts.6.w3.weight": "model-00161-of-00193.safetensors",
+ "model.layers.66.block_sparse_moe.experts.7.w1.weight": "model-00161-of-00193.safetensors",
+ "model.layers.66.block_sparse_moe.experts.7.w2.weight": "model-00162-of-00193.safetensors",
+ "model.layers.66.block_sparse_moe.experts.7.w3.weight": "model-00162-of-00193.safetensors",
+ "model.layers.66.block_sparse_moe.gate.weight": "model-00159-of-00193.safetensors",
+ "model.layers.66.input_layernorm.weight": "model-00162-of-00193.safetensors",
+ "model.layers.66.post_attention_layernorm.weight": "model-00162-of-00193.safetensors",
+ "model.layers.66.self_attn.k_proj.weight": "model-00159-of-00193.safetensors",
+ "model.layers.66.self_attn.o_proj.weight": "model-00159-of-00193.safetensors",
+ "model.layers.66.self_attn.q_proj.weight": "model-00159-of-00193.safetensors",
+ "model.layers.66.self_attn.v_proj.weight": "model-00159-of-00193.safetensors",
+ "model.layers.67.block_sparse_moe.experts.0.w1.weight": "model-00162-of-00193.safetensors",
+ "model.layers.67.block_sparse_moe.experts.0.w2.weight": "model-00162-of-00193.safetensors",
+ "model.layers.67.block_sparse_moe.experts.0.w3.weight": "model-00162-of-00193.safetensors",
+ "model.layers.67.block_sparse_moe.experts.1.w1.weight": "model-00162-of-00193.safetensors",
+ "model.layers.67.block_sparse_moe.experts.1.w2.weight": "model-00162-of-00193.safetensors",
+ "model.layers.67.block_sparse_moe.experts.1.w3.weight": "model-00162-of-00193.safetensors",
+ "model.layers.67.block_sparse_moe.experts.2.w1.weight": "model-00162-of-00193.safetensors",
+ "model.layers.67.block_sparse_moe.experts.2.w2.weight": "model-00162-of-00193.safetensors",
+ "model.layers.67.block_sparse_moe.experts.2.w3.weight": "model-00163-of-00193.safetensors",
+ "model.layers.67.block_sparse_moe.experts.3.w1.weight": "model-00163-of-00193.safetensors",
+ "model.layers.67.block_sparse_moe.experts.3.w2.weight": "model-00163-of-00193.safetensors",
+ "model.layers.67.block_sparse_moe.experts.3.w3.weight": "model-00163-of-00193.safetensors",
+ "model.layers.67.block_sparse_moe.experts.4.w1.weight": "model-00163-of-00193.safetensors",
+ "model.layers.67.block_sparse_moe.experts.4.w2.weight": "model-00163-of-00193.safetensors",
+ "model.layers.67.block_sparse_moe.experts.4.w3.weight": "model-00163-of-00193.safetensors",
+ "model.layers.67.block_sparse_moe.experts.5.w1.weight": "model-00163-of-00193.safetensors",
+ "model.layers.67.block_sparse_moe.experts.5.w2.weight": "model-00163-of-00193.safetensors",
+ "model.layers.67.block_sparse_moe.experts.5.w3.weight": "model-00163-of-00193.safetensors",
+ "model.layers.67.block_sparse_moe.experts.6.w1.weight": "model-00164-of-00193.safetensors",
+ "model.layers.67.block_sparse_moe.experts.6.w2.weight": "model-00164-of-00193.safetensors",
+ "model.layers.67.block_sparse_moe.experts.6.w3.weight": "model-00164-of-00193.safetensors",
+ "model.layers.67.block_sparse_moe.experts.7.w1.weight": "model-00164-of-00193.safetensors",
+ "model.layers.67.block_sparse_moe.experts.7.w2.weight": "model-00164-of-00193.safetensors",
+ "model.layers.67.block_sparse_moe.experts.7.w3.weight": "model-00164-of-00193.safetensors",
+ "model.layers.67.block_sparse_moe.gate.weight": "model-00162-of-00193.safetensors",
+ "model.layers.67.input_layernorm.weight": "model-00164-of-00193.safetensors",
+ "model.layers.67.post_attention_layernorm.weight": "model-00164-of-00193.safetensors",
+ "model.layers.67.self_attn.k_proj.weight": "model-00162-of-00193.safetensors",
+ "model.layers.67.self_attn.o_proj.weight": "model-00162-of-00193.safetensors",
+ "model.layers.67.self_attn.q_proj.weight": "model-00162-of-00193.safetensors",
+ "model.layers.67.self_attn.v_proj.weight": "model-00162-of-00193.safetensors",
+ "model.layers.68.block_sparse_moe.experts.0.w1.weight": "model-00164-of-00193.safetensors",
+ "model.layers.68.block_sparse_moe.experts.0.w2.weight": "model-00164-of-00193.safetensors",
+ "model.layers.68.block_sparse_moe.experts.0.w3.weight": "model-00164-of-00193.safetensors",
+ "model.layers.68.block_sparse_moe.experts.1.w1.weight": "model-00164-of-00193.safetensors",
+ "model.layers.68.block_sparse_moe.experts.1.w2.weight": "model-00165-of-00193.safetensors",
+ "model.layers.68.block_sparse_moe.experts.1.w3.weight": "model-00165-of-00193.safetensors",
+ "model.layers.68.block_sparse_moe.experts.2.w1.weight": "model-00165-of-00193.safetensors",
+ "model.layers.68.block_sparse_moe.experts.2.w2.weight": "model-00165-of-00193.safetensors",
+ "model.layers.68.block_sparse_moe.experts.2.w3.weight": "model-00165-of-00193.safetensors",
+ "model.layers.68.block_sparse_moe.experts.3.w1.weight": "model-00165-of-00193.safetensors",
+ "model.layers.68.block_sparse_moe.experts.3.w2.weight": "model-00165-of-00193.safetensors",
+ "model.layers.68.block_sparse_moe.experts.3.w3.weight": "model-00165-of-00193.safetensors",
+ "model.layers.68.block_sparse_moe.experts.4.w1.weight": "model-00165-of-00193.safetensors",
+ "model.layers.68.block_sparse_moe.experts.4.w2.weight": "model-00165-of-00193.safetensors",
+ "model.layers.68.block_sparse_moe.experts.4.w3.weight": "model-00166-of-00193.safetensors",
+ "model.layers.68.block_sparse_moe.experts.5.w1.weight": "model-00166-of-00193.safetensors",
+ "model.layers.68.block_sparse_moe.experts.5.w2.weight": "model-00166-of-00193.safetensors",
+ "model.layers.68.block_sparse_moe.experts.5.w3.weight": "model-00166-of-00193.safetensors",
+ "model.layers.68.block_sparse_moe.experts.6.w1.weight": "model-00166-of-00193.safetensors",
+ "model.layers.68.block_sparse_moe.experts.6.w2.weight": "model-00166-of-00193.safetensors",
+ "model.layers.68.block_sparse_moe.experts.6.w3.weight": "model-00166-of-00193.safetensors",
+ "model.layers.68.block_sparse_moe.experts.7.w1.weight": "model-00166-of-00193.safetensors",
+ "model.layers.68.block_sparse_moe.experts.7.w2.weight": "model-00166-of-00193.safetensors",
+ "model.layers.68.block_sparse_moe.experts.7.w3.weight": "model-00166-of-00193.safetensors",
+ "model.layers.68.block_sparse_moe.gate.weight": "model-00164-of-00193.safetensors",
+ "model.layers.68.input_layernorm.weight": "model-00166-of-00193.safetensors",
+ "model.layers.68.post_attention_layernorm.weight": "model-00166-of-00193.safetensors",
+ "model.layers.68.self_attn.k_proj.weight": "model-00164-of-00193.safetensors",
+ "model.layers.68.self_attn.o_proj.weight": "model-00164-of-00193.safetensors",
+ "model.layers.68.self_attn.q_proj.weight": "model-00164-of-00193.safetensors",
+ "model.layers.68.self_attn.v_proj.weight": "model-00164-of-00193.safetensors",
+ "model.layers.69.block_sparse_moe.experts.0.w1.weight": "model-00167-of-00193.safetensors",
+ "model.layers.69.block_sparse_moe.experts.0.w2.weight": "model-00167-of-00193.safetensors",
+ "model.layers.69.block_sparse_moe.experts.0.w3.weight": "model-00167-of-00193.safetensors",
+ "model.layers.69.block_sparse_moe.experts.1.w1.weight": "model-00167-of-00193.safetensors",
+ "model.layers.69.block_sparse_moe.experts.1.w2.weight": "model-00167-of-00193.safetensors",
+ "model.layers.69.block_sparse_moe.experts.1.w3.weight": "model-00167-of-00193.safetensors",
+ "model.layers.69.block_sparse_moe.experts.2.w1.weight": "model-00167-of-00193.safetensors",
+ "model.layers.69.block_sparse_moe.experts.2.w2.weight": "model-00167-of-00193.safetensors",
+ "model.layers.69.block_sparse_moe.experts.2.w3.weight": "model-00167-of-00193.safetensors",
+ "model.layers.69.block_sparse_moe.experts.3.w1.weight": "model-00167-of-00193.safetensors",
+ "model.layers.69.block_sparse_moe.experts.3.w2.weight": "model-00168-of-00193.safetensors",
+ "model.layers.69.block_sparse_moe.experts.3.w3.weight": "model-00168-of-00193.safetensors",
+ "model.layers.69.block_sparse_moe.experts.4.w1.weight": "model-00168-of-00193.safetensors",
+ "model.layers.69.block_sparse_moe.experts.4.w2.weight": "model-00168-of-00193.safetensors",
+ "model.layers.69.block_sparse_moe.experts.4.w3.weight": "model-00168-of-00193.safetensors",
+ "model.layers.69.block_sparse_moe.experts.5.w1.weight": "model-00168-of-00193.safetensors",
+ "model.layers.69.block_sparse_moe.experts.5.w2.weight": "model-00168-of-00193.safetensors",
+ "model.layers.69.block_sparse_moe.experts.5.w3.weight": "model-00168-of-00193.safetensors",
+ "model.layers.69.block_sparse_moe.experts.6.w1.weight": "model-00168-of-00193.safetensors",
+ "model.layers.69.block_sparse_moe.experts.6.w2.weight": "model-00168-of-00193.safetensors",
+ "model.layers.69.block_sparse_moe.experts.6.w3.weight": "model-00169-of-00193.safetensors",
+ "model.layers.69.block_sparse_moe.experts.7.w1.weight": "model-00169-of-00193.safetensors",
+ "model.layers.69.block_sparse_moe.experts.7.w2.weight": "model-00169-of-00193.safetensors",
+ "model.layers.69.block_sparse_moe.experts.7.w3.weight": "model-00169-of-00193.safetensors",
+ "model.layers.69.block_sparse_moe.gate.weight": "model-00166-of-00193.safetensors",
+ "model.layers.69.input_layernorm.weight": "model-00169-of-00193.safetensors",
+ "model.layers.69.post_attention_layernorm.weight": "model-00169-of-00193.safetensors",
+ "model.layers.69.self_attn.k_proj.weight": "model-00166-of-00193.safetensors",
+ "model.layers.69.self_attn.o_proj.weight": "model-00166-of-00193.safetensors",
+ "model.layers.69.self_attn.q_proj.weight": "model-00166-of-00193.safetensors",
+ "model.layers.69.self_attn.v_proj.weight": "model-00166-of-00193.safetensors",
+ "model.layers.7.block_sparse_moe.experts.0.w1.weight": "model-00018-of-00193.safetensors",
+ "model.layers.7.block_sparse_moe.experts.0.w2.weight": "model-00018-of-00193.safetensors",
+ "model.layers.7.block_sparse_moe.experts.0.w3.weight": "model-00018-of-00193.safetensors",
+ "model.layers.7.block_sparse_moe.experts.1.w1.weight": "model-00018-of-00193.safetensors",
+ "model.layers.7.block_sparse_moe.experts.1.w2.weight": "model-00018-of-00193.safetensors",
+ "model.layers.7.block_sparse_moe.experts.1.w3.weight": "model-00018-of-00193.safetensors",
+ "model.layers.7.block_sparse_moe.experts.2.w1.weight": "model-00018-of-00193.safetensors",
+ "model.layers.7.block_sparse_moe.experts.2.w2.weight": "model-00018-of-00193.safetensors",
+ "model.layers.7.block_sparse_moe.experts.2.w3.weight": "model-00019-of-00193.safetensors",
+ "model.layers.7.block_sparse_moe.experts.3.w1.weight": "model-00019-of-00193.safetensors",
+ "model.layers.7.block_sparse_moe.experts.3.w2.weight": "model-00019-of-00193.safetensors",
+ "model.layers.7.block_sparse_moe.experts.3.w3.weight": "model-00019-of-00193.safetensors",
+ "model.layers.7.block_sparse_moe.experts.4.w1.weight": "model-00019-of-00193.safetensors",
+ "model.layers.7.block_sparse_moe.experts.4.w2.weight": "model-00019-of-00193.safetensors",
+ "model.layers.7.block_sparse_moe.experts.4.w3.weight": "model-00019-of-00193.safetensors",
+ "model.layers.7.block_sparse_moe.experts.5.w1.weight": "model-00019-of-00193.safetensors",
+ "model.layers.7.block_sparse_moe.experts.5.w2.weight": "model-00019-of-00193.safetensors",
+ "model.layers.7.block_sparse_moe.experts.5.w3.weight": "model-00019-of-00193.safetensors",
+ "model.layers.7.block_sparse_moe.experts.6.w1.weight": "model-00020-of-00193.safetensors",
+ "model.layers.7.block_sparse_moe.experts.6.w2.weight": "model-00020-of-00193.safetensors",
+ "model.layers.7.block_sparse_moe.experts.6.w3.weight": "model-00020-of-00193.safetensors",
+ "model.layers.7.block_sparse_moe.experts.7.w1.weight": "model-00020-of-00193.safetensors",
+ "model.layers.7.block_sparse_moe.experts.7.w2.weight": "model-00020-of-00193.safetensors",
+ "model.layers.7.block_sparse_moe.experts.7.w3.weight": "model-00020-of-00193.safetensors",
+ "model.layers.7.block_sparse_moe.gate.weight": "model-00018-of-00193.safetensors",
+ "model.layers.7.input_layernorm.weight": "model-00020-of-00193.safetensors",
+ "model.layers.7.post_attention_layernorm.weight": "model-00020-of-00193.safetensors",
+ "model.layers.7.self_attn.k_proj.weight": "model-00018-of-00193.safetensors",
+ "model.layers.7.self_attn.o_proj.weight": "model-00018-of-00193.safetensors",
+ "model.layers.7.self_attn.q_proj.weight": "model-00018-of-00193.safetensors",
+ "model.layers.7.self_attn.v_proj.weight": "model-00018-of-00193.safetensors",
+ "model.layers.70.block_sparse_moe.experts.0.w1.weight": "model-00169-of-00193.safetensors",
+ "model.layers.70.block_sparse_moe.experts.0.w2.weight": "model-00169-of-00193.safetensors",
+ "model.layers.70.block_sparse_moe.experts.0.w3.weight": "model-00169-of-00193.safetensors",
+ "model.layers.70.block_sparse_moe.experts.1.w1.weight": "model-00169-of-00193.safetensors",
+ "model.layers.70.block_sparse_moe.experts.1.w2.weight": "model-00169-of-00193.safetensors",
+ "model.layers.70.block_sparse_moe.experts.1.w3.weight": "model-00169-of-00193.safetensors",
+ "model.layers.70.block_sparse_moe.experts.2.w1.weight": "model-00170-of-00193.safetensors",
+ "model.layers.70.block_sparse_moe.experts.2.w2.weight": "model-00170-of-00193.safetensors",
+ "model.layers.70.block_sparse_moe.experts.2.w3.weight": "model-00170-of-00193.safetensors",
+ "model.layers.70.block_sparse_moe.experts.3.w1.weight": "model-00170-of-00193.safetensors",
+ "model.layers.70.block_sparse_moe.experts.3.w2.weight": "model-00170-of-00193.safetensors",
+ "model.layers.70.block_sparse_moe.experts.3.w3.weight": "model-00170-of-00193.safetensors",
+ "model.layers.70.block_sparse_moe.experts.4.w1.weight": "model-00170-of-00193.safetensors",
+ "model.layers.70.block_sparse_moe.experts.4.w2.weight": "model-00170-of-00193.safetensors",
+ "model.layers.70.block_sparse_moe.experts.4.w3.weight": "model-00170-of-00193.safetensors",
+ "model.layers.70.block_sparse_moe.experts.5.w1.weight": "model-00170-of-00193.safetensors",
+ "model.layers.70.block_sparse_moe.experts.5.w2.weight": "model-00171-of-00193.safetensors",
+ "model.layers.70.block_sparse_moe.experts.5.w3.weight": "model-00171-of-00193.safetensors",
+ "model.layers.70.block_sparse_moe.experts.6.w1.weight": "model-00171-of-00193.safetensors",
+ "model.layers.70.block_sparse_moe.experts.6.w2.weight": "model-00171-of-00193.safetensors",
+ "model.layers.70.block_sparse_moe.experts.6.w3.weight": "model-00171-of-00193.safetensors",
+ "model.layers.70.block_sparse_moe.experts.7.w1.weight": "model-00171-of-00193.safetensors",
+ "model.layers.70.block_sparse_moe.experts.7.w2.weight": "model-00171-of-00193.safetensors",
+ "model.layers.70.block_sparse_moe.experts.7.w3.weight": "model-00171-of-00193.safetensors",
+ "model.layers.70.block_sparse_moe.gate.weight": "model-00169-of-00193.safetensors",
+ "model.layers.70.input_layernorm.weight": "model-00171-of-00193.safetensors",
+ "model.layers.70.post_attention_layernorm.weight": "model-00171-of-00193.safetensors",
+ "model.layers.70.self_attn.k_proj.weight": "model-00169-of-00193.safetensors",
+ "model.layers.70.self_attn.o_proj.weight": "model-00169-of-00193.safetensors",
+ "model.layers.70.self_attn.q_proj.weight": "model-00169-of-00193.safetensors",
+ "model.layers.70.self_attn.v_proj.weight": "model-00169-of-00193.safetensors",
+ "model.layers.71.block_sparse_moe.experts.0.w1.weight": "model-00171-of-00193.safetensors",
+ "model.layers.71.block_sparse_moe.experts.0.w2.weight": "model-00171-of-00193.safetensors",
+ "model.layers.71.block_sparse_moe.experts.0.w3.weight": "model-00172-of-00193.safetensors",
+ "model.layers.71.block_sparse_moe.experts.1.w1.weight": "model-00172-of-00193.safetensors",
+ "model.layers.71.block_sparse_moe.experts.1.w2.weight": "model-00172-of-00193.safetensors",
+ "model.layers.71.block_sparse_moe.experts.1.w3.weight": "model-00172-of-00193.safetensors",
+ "model.layers.71.block_sparse_moe.experts.2.w1.weight": "model-00172-of-00193.safetensors",
+ "model.layers.71.block_sparse_moe.experts.2.w2.weight": "model-00172-of-00193.safetensors",
+ "model.layers.71.block_sparse_moe.experts.2.w3.weight": "model-00172-of-00193.safetensors",
+ "model.layers.71.block_sparse_moe.experts.3.w1.weight": "model-00172-of-00193.safetensors",
+ "model.layers.71.block_sparse_moe.experts.3.w2.weight": "model-00172-of-00193.safetensors",
+ "model.layers.71.block_sparse_moe.experts.3.w3.weight": "model-00172-of-00193.safetensors",
+ "model.layers.71.block_sparse_moe.experts.4.w1.weight": "model-00173-of-00193.safetensors",
+ "model.layers.71.block_sparse_moe.experts.4.w2.weight": "model-00173-of-00193.safetensors",
+ "model.layers.71.block_sparse_moe.experts.4.w3.weight": "model-00173-of-00193.safetensors",
+ "model.layers.71.block_sparse_moe.experts.5.w1.weight": "model-00173-of-00193.safetensors",
+ "model.layers.71.block_sparse_moe.experts.5.w2.weight": "model-00173-of-00193.safetensors",
+ "model.layers.71.block_sparse_moe.experts.5.w3.weight": "model-00173-of-00193.safetensors",
+ "model.layers.71.block_sparse_moe.experts.6.w1.weight": "model-00173-of-00193.safetensors",
+ "model.layers.71.block_sparse_moe.experts.6.w2.weight": "model-00173-of-00193.safetensors",
+ "model.layers.71.block_sparse_moe.experts.6.w3.weight": "model-00173-of-00193.safetensors",
+ "model.layers.71.block_sparse_moe.experts.7.w1.weight": "model-00173-of-00193.safetensors",
+ "model.layers.71.block_sparse_moe.experts.7.w2.weight": "model-00174-of-00193.safetensors",
+ "model.layers.71.block_sparse_moe.experts.7.w3.weight": "model-00174-of-00193.safetensors",
+ "model.layers.71.block_sparse_moe.gate.weight": "model-00171-of-00193.safetensors",
+ "model.layers.71.input_layernorm.weight": "model-00174-of-00193.safetensors",
+ "model.layers.71.post_attention_layernorm.weight": "model-00174-of-00193.safetensors",
+ "model.layers.71.self_attn.k_proj.weight": "model-00171-of-00193.safetensors",
+ "model.layers.71.self_attn.o_proj.weight": "model-00171-of-00193.safetensors",
+ "model.layers.71.self_attn.q_proj.weight": "model-00171-of-00193.safetensors",
+ "model.layers.71.self_attn.v_proj.weight": "model-00171-of-00193.safetensors",
+ "model.layers.72.block_sparse_moe.experts.0.w1.weight": "model-00174-of-00193.safetensors",
+ "model.layers.72.block_sparse_moe.experts.0.w2.weight": "model-00174-of-00193.safetensors",
+ "model.layers.72.block_sparse_moe.experts.0.w3.weight": "model-00174-of-00193.safetensors",
+ "model.layers.72.block_sparse_moe.experts.1.w1.weight": "model-00174-of-00193.safetensors",
+ "model.layers.72.block_sparse_moe.experts.1.w2.weight": "model-00174-of-00193.safetensors",
+ "model.layers.72.block_sparse_moe.experts.1.w3.weight": "model-00174-of-00193.safetensors",
+ "model.layers.72.block_sparse_moe.experts.2.w1.weight": "model-00174-of-00193.safetensors",
+ "model.layers.72.block_sparse_moe.experts.2.w2.weight": "model-00174-of-00193.safetensors",
+ "model.layers.72.block_sparse_moe.experts.2.w3.weight": "model-00175-of-00193.safetensors",
+ "model.layers.72.block_sparse_moe.experts.3.w1.weight": "model-00175-of-00193.safetensors",
+ "model.layers.72.block_sparse_moe.experts.3.w2.weight": "model-00175-of-00193.safetensors",
+ "model.layers.72.block_sparse_moe.experts.3.w3.weight": "model-00175-of-00193.safetensors",
+ "model.layers.72.block_sparse_moe.experts.4.w1.weight": "model-00175-of-00193.safetensors",
+ "model.layers.72.block_sparse_moe.experts.4.w2.weight": "model-00175-of-00193.safetensors",
+ "model.layers.72.block_sparse_moe.experts.4.w3.weight": "model-00175-of-00193.safetensors",
+ "model.layers.72.block_sparse_moe.experts.5.w1.weight": "model-00175-of-00193.safetensors",
+ "model.layers.72.block_sparse_moe.experts.5.w2.weight": "model-00175-of-00193.safetensors",
+ "model.layers.72.block_sparse_moe.experts.5.w3.weight": "model-00175-of-00193.safetensors",
+ "model.layers.72.block_sparse_moe.experts.6.w1.weight": "model-00176-of-00193.safetensors",
+ "model.layers.72.block_sparse_moe.experts.6.w2.weight": "model-00176-of-00193.safetensors",
+ "model.layers.72.block_sparse_moe.experts.6.w3.weight": "model-00176-of-00193.safetensors",
+ "model.layers.72.block_sparse_moe.experts.7.w1.weight": "model-00176-of-00193.safetensors",
+ "model.layers.72.block_sparse_moe.experts.7.w2.weight": "model-00176-of-00193.safetensors",
+ "model.layers.72.block_sparse_moe.experts.7.w3.weight": "model-00176-of-00193.safetensors",
+ "model.layers.72.block_sparse_moe.gate.weight": "model-00174-of-00193.safetensors",
+ "model.layers.72.input_layernorm.weight": "model-00176-of-00193.safetensors",
+ "model.layers.72.post_attention_layernorm.weight": "model-00176-of-00193.safetensors",
+ "model.layers.72.self_attn.k_proj.weight": "model-00174-of-00193.safetensors",
+ "model.layers.72.self_attn.o_proj.weight": "model-00174-of-00193.safetensors",
+ "model.layers.72.self_attn.q_proj.weight": "model-00174-of-00193.safetensors",
+ "model.layers.72.self_attn.v_proj.weight": "model-00174-of-00193.safetensors",
+ "model.layers.73.block_sparse_moe.experts.0.w1.weight": "model-00176-of-00193.safetensors",
+ "model.layers.73.block_sparse_moe.experts.0.w2.weight": "model-00176-of-00193.safetensors",
+ "model.layers.73.block_sparse_moe.experts.0.w3.weight": "model-00176-of-00193.safetensors",
+ "model.layers.73.block_sparse_moe.experts.1.w1.weight": "model-00176-of-00193.safetensors",
+ "model.layers.73.block_sparse_moe.experts.1.w2.weight": "model-00177-of-00193.safetensors",
+ "model.layers.73.block_sparse_moe.experts.1.w3.weight": "model-00177-of-00193.safetensors",
+ "model.layers.73.block_sparse_moe.experts.2.w1.weight": "model-00177-of-00193.safetensors",
+ "model.layers.73.block_sparse_moe.experts.2.w2.weight": "model-00177-of-00193.safetensors",
+ "model.layers.73.block_sparse_moe.experts.2.w3.weight": "model-00177-of-00193.safetensors",
+ "model.layers.73.block_sparse_moe.experts.3.w1.weight": "model-00177-of-00193.safetensors",
+ "model.layers.73.block_sparse_moe.experts.3.w2.weight": "model-00177-of-00193.safetensors",
+ "model.layers.73.block_sparse_moe.experts.3.w3.weight": "model-00177-of-00193.safetensors",
+ "model.layers.73.block_sparse_moe.experts.4.w1.weight": "model-00177-of-00193.safetensors",
+ "model.layers.73.block_sparse_moe.experts.4.w2.weight": "model-00177-of-00193.safetensors",
+ "model.layers.73.block_sparse_moe.experts.4.w3.weight": "model-00178-of-00193.safetensors",
+ "model.layers.73.block_sparse_moe.experts.5.w1.weight": "model-00178-of-00193.safetensors",
+ "model.layers.73.block_sparse_moe.experts.5.w2.weight": "model-00178-of-00193.safetensors",
+ "model.layers.73.block_sparse_moe.experts.5.w3.weight": "model-00178-of-00193.safetensors",
+ "model.layers.73.block_sparse_moe.experts.6.w1.weight": "model-00178-of-00193.safetensors",
+ "model.layers.73.block_sparse_moe.experts.6.w2.weight": "model-00178-of-00193.safetensors",
+ "model.layers.73.block_sparse_moe.experts.6.w3.weight": "model-00178-of-00193.safetensors",
+ "model.layers.73.block_sparse_moe.experts.7.w1.weight": "model-00178-of-00193.safetensors",
+ "model.layers.73.block_sparse_moe.experts.7.w2.weight": "model-00178-of-00193.safetensors",
+ "model.layers.73.block_sparse_moe.experts.7.w3.weight": "model-00178-of-00193.safetensors",
+ "model.layers.73.block_sparse_moe.gate.weight": "model-00176-of-00193.safetensors",
+ "model.layers.73.input_layernorm.weight": "model-00178-of-00193.safetensors",
+ "model.layers.73.post_attention_layernorm.weight": "model-00178-of-00193.safetensors",
+ "model.layers.73.self_attn.k_proj.weight": "model-00176-of-00193.safetensors",
+ "model.layers.73.self_attn.o_proj.weight": "model-00176-of-00193.safetensors",
+ "model.layers.73.self_attn.q_proj.weight": "model-00176-of-00193.safetensors",
+ "model.layers.73.self_attn.v_proj.weight": "model-00176-of-00193.safetensors",
+ "model.layers.74.block_sparse_moe.experts.0.w1.weight": "model-00179-of-00193.safetensors",
+ "model.layers.74.block_sparse_moe.experts.0.w2.weight": "model-00179-of-00193.safetensors",
+ "model.layers.74.block_sparse_moe.experts.0.w3.weight": "model-00179-of-00193.safetensors",
+ "model.layers.74.block_sparse_moe.experts.1.w1.weight": "model-00179-of-00193.safetensors",
+ "model.layers.74.block_sparse_moe.experts.1.w2.weight": "model-00179-of-00193.safetensors",
+ "model.layers.74.block_sparse_moe.experts.1.w3.weight": "model-00179-of-00193.safetensors",
+ "model.layers.74.block_sparse_moe.experts.2.w1.weight": "model-00179-of-00193.safetensors",
+ "model.layers.74.block_sparse_moe.experts.2.w2.weight": "model-00179-of-00193.safetensors",
+ "model.layers.74.block_sparse_moe.experts.2.w3.weight": "model-00179-of-00193.safetensors",
+ "model.layers.74.block_sparse_moe.experts.3.w1.weight": "model-00179-of-00193.safetensors",
+ "model.layers.74.block_sparse_moe.experts.3.w2.weight": "model-00180-of-00193.safetensors",
+ "model.layers.74.block_sparse_moe.experts.3.w3.weight": "model-00180-of-00193.safetensors",
+ "model.layers.74.block_sparse_moe.experts.4.w1.weight": "model-00180-of-00193.safetensors",
+ "model.layers.74.block_sparse_moe.experts.4.w2.weight": "model-00180-of-00193.safetensors",
+ "model.layers.74.block_sparse_moe.experts.4.w3.weight": "model-00180-of-00193.safetensors",
+ "model.layers.74.block_sparse_moe.experts.5.w1.weight": "model-00180-of-00193.safetensors",
+ "model.layers.74.block_sparse_moe.experts.5.w2.weight": "model-00180-of-00193.safetensors",
+ "model.layers.74.block_sparse_moe.experts.5.w3.weight": "model-00180-of-00193.safetensors",
+ "model.layers.74.block_sparse_moe.experts.6.w1.weight": "model-00180-of-00193.safetensors",
+ "model.layers.74.block_sparse_moe.experts.6.w2.weight": "model-00180-of-00193.safetensors",
+ "model.layers.74.block_sparse_moe.experts.6.w3.weight": "model-00181-of-00193.safetensors",
+ "model.layers.74.block_sparse_moe.experts.7.w1.weight": "model-00181-of-00193.safetensors",
+ "model.layers.74.block_sparse_moe.experts.7.w2.weight": "model-00181-of-00193.safetensors",
+ "model.layers.74.block_sparse_moe.experts.7.w3.weight": "model-00181-of-00193.safetensors",
+ "model.layers.74.block_sparse_moe.gate.weight": "model-00178-of-00193.safetensors",
+ "model.layers.74.input_layernorm.weight": "model-00181-of-00193.safetensors",
+ "model.layers.74.post_attention_layernorm.weight": "model-00181-of-00193.safetensors",
+ "model.layers.74.self_attn.k_proj.weight": "model-00178-of-00193.safetensors",
+ "model.layers.74.self_attn.o_proj.weight": "model-00178-of-00193.safetensors",
+ "model.layers.74.self_attn.q_proj.weight": "model-00178-of-00193.safetensors",
+ "model.layers.74.self_attn.v_proj.weight": "model-00178-of-00193.safetensors",
+ "model.layers.75.block_sparse_moe.experts.0.w1.weight": "model-00181-of-00193.safetensors",
+ "model.layers.75.block_sparse_moe.experts.0.w2.weight": "model-00181-of-00193.safetensors",
+ "model.layers.75.block_sparse_moe.experts.0.w3.weight": "model-00181-of-00193.safetensors",
+ "model.layers.75.block_sparse_moe.experts.1.w1.weight": "model-00181-of-00193.safetensors",
+ "model.layers.75.block_sparse_moe.experts.1.w2.weight": "model-00181-of-00193.safetensors",
+ "model.layers.75.block_sparse_moe.experts.1.w3.weight": "model-00181-of-00193.safetensors",
+ "model.layers.75.block_sparse_moe.experts.2.w1.weight": "model-00182-of-00193.safetensors",
+ "model.layers.75.block_sparse_moe.experts.2.w2.weight": "model-00182-of-00193.safetensors",
+ "model.layers.75.block_sparse_moe.experts.2.w3.weight": "model-00182-of-00193.safetensors",
+ "model.layers.75.block_sparse_moe.experts.3.w1.weight": "model-00182-of-00193.safetensors",
+ "model.layers.75.block_sparse_moe.experts.3.w2.weight": "model-00182-of-00193.safetensors",
+ "model.layers.75.block_sparse_moe.experts.3.w3.weight": "model-00182-of-00193.safetensors",
+ "model.layers.75.block_sparse_moe.experts.4.w1.weight": "model-00182-of-00193.safetensors",
+ "model.layers.75.block_sparse_moe.experts.4.w2.weight": "model-00182-of-00193.safetensors",
+ "model.layers.75.block_sparse_moe.experts.4.w3.weight": "model-00182-of-00193.safetensors",
+ "model.layers.75.block_sparse_moe.experts.5.w1.weight": "model-00182-of-00193.safetensors",
+ "model.layers.75.block_sparse_moe.experts.5.w2.weight": "model-00183-of-00193.safetensors",
+ "model.layers.75.block_sparse_moe.experts.5.w3.weight": "model-00183-of-00193.safetensors",
+ "model.layers.75.block_sparse_moe.experts.6.w1.weight": "model-00183-of-00193.safetensors",
+ "model.layers.75.block_sparse_moe.experts.6.w2.weight": "model-00183-of-00193.safetensors",
+ "model.layers.75.block_sparse_moe.experts.6.w3.weight": "model-00183-of-00193.safetensors",
+ "model.layers.75.block_sparse_moe.experts.7.w1.weight": "model-00183-of-00193.safetensors",
+ "model.layers.75.block_sparse_moe.experts.7.w2.weight": "model-00183-of-00193.safetensors",
+ "model.layers.75.block_sparse_moe.experts.7.w3.weight": "model-00183-of-00193.safetensors",
+ "model.layers.75.block_sparse_moe.gate.weight": "model-00181-of-00193.safetensors",
+ "model.layers.75.input_layernorm.weight": "model-00183-of-00193.safetensors",
+ "model.layers.75.post_attention_layernorm.weight": "model-00183-of-00193.safetensors",
+ "model.layers.75.self_attn.k_proj.weight": "model-00181-of-00193.safetensors",
+ "model.layers.75.self_attn.o_proj.weight": "model-00181-of-00193.safetensors",
+ "model.layers.75.self_attn.q_proj.weight": "model-00181-of-00193.safetensors",
+ "model.layers.75.self_attn.v_proj.weight": "model-00181-of-00193.safetensors",
+ "model.layers.76.block_sparse_moe.experts.0.w1.weight": "model-00183-of-00193.safetensors",
+ "model.layers.76.block_sparse_moe.experts.0.w2.weight": "model-00183-of-00193.safetensors",
+ "model.layers.76.block_sparse_moe.experts.0.w3.weight": "model-00184-of-00193.safetensors",
+ "model.layers.76.block_sparse_moe.experts.1.w1.weight": "model-00184-of-00193.safetensors",
+ "model.layers.76.block_sparse_moe.experts.1.w2.weight": "model-00184-of-00193.safetensors",
+ "model.layers.76.block_sparse_moe.experts.1.w3.weight": "model-00184-of-00193.safetensors",
+ "model.layers.76.block_sparse_moe.experts.2.w1.weight": "model-00184-of-00193.safetensors",
+ "model.layers.76.block_sparse_moe.experts.2.w2.weight": "model-00184-of-00193.safetensors",
+ "model.layers.76.block_sparse_moe.experts.2.w3.weight": "model-00184-of-00193.safetensors",
+ "model.layers.76.block_sparse_moe.experts.3.w1.weight": "model-00184-of-00193.safetensors",
+ "model.layers.76.block_sparse_moe.experts.3.w2.weight": "model-00184-of-00193.safetensors",
+ "model.layers.76.block_sparse_moe.experts.3.w3.weight": "model-00184-of-00193.safetensors",
+ "model.layers.76.block_sparse_moe.experts.4.w1.weight": "model-00185-of-00193.safetensors",
+ "model.layers.76.block_sparse_moe.experts.4.w2.weight": "model-00185-of-00193.safetensors",
+ "model.layers.76.block_sparse_moe.experts.4.w3.weight": "model-00185-of-00193.safetensors",
+ "model.layers.76.block_sparse_moe.experts.5.w1.weight": "model-00185-of-00193.safetensors",
+ "model.layers.76.block_sparse_moe.experts.5.w2.weight": "model-00185-of-00193.safetensors",
+ "model.layers.76.block_sparse_moe.experts.5.w3.weight": "model-00185-of-00193.safetensors",
+ "model.layers.76.block_sparse_moe.experts.6.w1.weight": "model-00185-of-00193.safetensors",
+ "model.layers.76.block_sparse_moe.experts.6.w2.weight": "model-00185-of-00193.safetensors",
+ "model.layers.76.block_sparse_moe.experts.6.w3.weight": "model-00185-of-00193.safetensors",
+ "model.layers.76.block_sparse_moe.experts.7.w1.weight": "model-00185-of-00193.safetensors",
+ "model.layers.76.block_sparse_moe.experts.7.w2.weight": "model-00186-of-00193.safetensors",
+ "model.layers.76.block_sparse_moe.experts.7.w3.weight": "model-00186-of-00193.safetensors",
+ "model.layers.76.block_sparse_moe.gate.weight": "model-00183-of-00193.safetensors",
+ "model.layers.76.input_layernorm.weight": "model-00186-of-00193.safetensors",
+ "model.layers.76.post_attention_layernorm.weight": "model-00186-of-00193.safetensors",
+ "model.layers.76.self_attn.k_proj.weight": "model-00183-of-00193.safetensors",
+ "model.layers.76.self_attn.o_proj.weight": "model-00183-of-00193.safetensors",
+ "model.layers.76.self_attn.q_proj.weight": "model-00183-of-00193.safetensors",
+ "model.layers.76.self_attn.v_proj.weight": "model-00183-of-00193.safetensors",
+ "model.layers.77.block_sparse_moe.experts.0.w1.weight": "model-00186-of-00193.safetensors",
+ "model.layers.77.block_sparse_moe.experts.0.w2.weight": "model-00186-of-00193.safetensors",
+ "model.layers.77.block_sparse_moe.experts.0.w3.weight": "model-00186-of-00193.safetensors",
+ "model.layers.77.block_sparse_moe.experts.1.w1.weight": "model-00186-of-00193.safetensors",
+ "model.layers.77.block_sparse_moe.experts.1.w2.weight": "model-00186-of-00193.safetensors",
+ "model.layers.77.block_sparse_moe.experts.1.w3.weight": "model-00186-of-00193.safetensors",
+ "model.layers.77.block_sparse_moe.experts.2.w1.weight": "model-00186-of-00193.safetensors",
+ "model.layers.77.block_sparse_moe.experts.2.w2.weight": "model-00186-of-00193.safetensors",
+ "model.layers.77.block_sparse_moe.experts.2.w3.weight": "model-00187-of-00193.safetensors",
+ "model.layers.77.block_sparse_moe.experts.3.w1.weight": "model-00187-of-00193.safetensors",
+ "model.layers.77.block_sparse_moe.experts.3.w2.weight": "model-00187-of-00193.safetensors",
+ "model.layers.77.block_sparse_moe.experts.3.w3.weight": "model-00187-of-00193.safetensors",
+ "model.layers.77.block_sparse_moe.experts.4.w1.weight": "model-00187-of-00193.safetensors",
+ "model.layers.77.block_sparse_moe.experts.4.w2.weight": "model-00187-of-00193.safetensors",
+ "model.layers.77.block_sparse_moe.experts.4.w3.weight": "model-00187-of-00193.safetensors",
+ "model.layers.77.block_sparse_moe.experts.5.w1.weight": "model-00187-of-00193.safetensors",
+ "model.layers.77.block_sparse_moe.experts.5.w2.weight": "model-00187-of-00193.safetensors",
+ "model.layers.77.block_sparse_moe.experts.5.w3.weight": "model-00187-of-00193.safetensors",
+ "model.layers.77.block_sparse_moe.experts.6.w1.weight": "model-00188-of-00193.safetensors",
+ "model.layers.77.block_sparse_moe.experts.6.w2.weight": "model-00188-of-00193.safetensors",
+ "model.layers.77.block_sparse_moe.experts.6.w3.weight": "model-00188-of-00193.safetensors",
+ "model.layers.77.block_sparse_moe.experts.7.w1.weight": "model-00188-of-00193.safetensors",
+ "model.layers.77.block_sparse_moe.experts.7.w2.weight": "model-00188-of-00193.safetensors",
+ "model.layers.77.block_sparse_moe.experts.7.w3.weight": "model-00188-of-00193.safetensors",
+ "model.layers.77.block_sparse_moe.gate.weight": "model-00186-of-00193.safetensors",
+ "model.layers.77.input_layernorm.weight": "model-00188-of-00193.safetensors",
+ "model.layers.77.post_attention_layernorm.weight": "model-00188-of-00193.safetensors",
+ "model.layers.77.self_attn.k_proj.weight": "model-00186-of-00193.safetensors",
+ "model.layers.77.self_attn.o_proj.weight": "model-00186-of-00193.safetensors",
+ "model.layers.77.self_attn.q_proj.weight": "model-00186-of-00193.safetensors",
+ "model.layers.77.self_attn.v_proj.weight": "model-00186-of-00193.safetensors",
+ "model.layers.78.block_sparse_moe.experts.0.w1.weight": "model-00188-of-00193.safetensors",
+ "model.layers.78.block_sparse_moe.experts.0.w2.weight": "model-00188-of-00193.safetensors",
+ "model.layers.78.block_sparse_moe.experts.0.w3.weight": "model-00188-of-00193.safetensors",
+ "model.layers.78.block_sparse_moe.experts.1.w1.weight": "model-00188-of-00193.safetensors",
+ "model.layers.78.block_sparse_moe.experts.1.w2.weight": "model-00189-of-00193.safetensors",
+ "model.layers.78.block_sparse_moe.experts.1.w3.weight": "model-00189-of-00193.safetensors",
+ "model.layers.78.block_sparse_moe.experts.2.w1.weight": "model-00189-of-00193.safetensors",
+ "model.layers.78.block_sparse_moe.experts.2.w2.weight": "model-00189-of-00193.safetensors",
+ "model.layers.78.block_sparse_moe.experts.2.w3.weight": "model-00189-of-00193.safetensors",
+ "model.layers.78.block_sparse_moe.experts.3.w1.weight": "model-00189-of-00193.safetensors",
+ "model.layers.78.block_sparse_moe.experts.3.w2.weight": "model-00189-of-00193.safetensors",
+ "model.layers.78.block_sparse_moe.experts.3.w3.weight": "model-00189-of-00193.safetensors",
+ "model.layers.78.block_sparse_moe.experts.4.w1.weight": "model-00189-of-00193.safetensors",
+ "model.layers.78.block_sparse_moe.experts.4.w2.weight": "model-00189-of-00193.safetensors",
+ "model.layers.78.block_sparse_moe.experts.4.w3.weight": "model-00190-of-00193.safetensors",
+ "model.layers.78.block_sparse_moe.experts.5.w1.weight": "model-00190-of-00193.safetensors",
+ "model.layers.78.block_sparse_moe.experts.5.w2.weight": "model-00190-of-00193.safetensors",
+ "model.layers.78.block_sparse_moe.experts.5.w3.weight": "model-00190-of-00193.safetensors",
+ "model.layers.78.block_sparse_moe.experts.6.w1.weight": "model-00190-of-00193.safetensors",
+ "model.layers.78.block_sparse_moe.experts.6.w2.weight": "model-00190-of-00193.safetensors",
+ "model.layers.78.block_sparse_moe.experts.6.w3.weight": "model-00190-of-00193.safetensors",
+ "model.layers.78.block_sparse_moe.experts.7.w1.weight": "model-00190-of-00193.safetensors",
+ "model.layers.78.block_sparse_moe.experts.7.w2.weight": "model-00190-of-00193.safetensors",
+ "model.layers.78.block_sparse_moe.experts.7.w3.weight": "model-00190-of-00193.safetensors",
+ "model.layers.78.block_sparse_moe.gate.weight": "model-00188-of-00193.safetensors",
+ "model.layers.78.input_layernorm.weight": "model-00190-of-00193.safetensors",
+ "model.layers.78.post_attention_layernorm.weight": "model-00190-of-00193.safetensors",
+ "model.layers.78.self_attn.k_proj.weight": "model-00188-of-00193.safetensors",
+ "model.layers.78.self_attn.o_proj.weight": "model-00188-of-00193.safetensors",
+ "model.layers.78.self_attn.q_proj.weight": "model-00188-of-00193.safetensors",
+ "model.layers.78.self_attn.v_proj.weight": "model-00188-of-00193.safetensors",
+ "model.layers.79.block_sparse_moe.experts.0.w1.weight": "model-00191-of-00193.safetensors",
+ "model.layers.79.block_sparse_moe.experts.0.w2.weight": "model-00191-of-00193.safetensors",
+ "model.layers.79.block_sparse_moe.experts.0.w3.weight": "model-00191-of-00193.safetensors",
+ "model.layers.79.block_sparse_moe.experts.1.w1.weight": "model-00191-of-00193.safetensors",
+ "model.layers.79.block_sparse_moe.experts.1.w2.weight": "model-00191-of-00193.safetensors",
+ "model.layers.79.block_sparse_moe.experts.1.w3.weight": "model-00191-of-00193.safetensors",
+ "model.layers.79.block_sparse_moe.experts.2.w1.weight": "model-00191-of-00193.safetensors",
+ "model.layers.79.block_sparse_moe.experts.2.w2.weight": "model-00191-of-00193.safetensors",
+ "model.layers.79.block_sparse_moe.experts.2.w3.weight": "model-00191-of-00193.safetensors",
+ "model.layers.79.block_sparse_moe.experts.3.w1.weight": "model-00191-of-00193.safetensors",
+ "model.layers.79.block_sparse_moe.experts.3.w2.weight": "model-00192-of-00193.safetensors",
+ "model.layers.79.block_sparse_moe.experts.3.w3.weight": "model-00192-of-00193.safetensors",
+ "model.layers.79.block_sparse_moe.experts.4.w1.weight": "model-00192-of-00193.safetensors",
+ "model.layers.79.block_sparse_moe.experts.4.w2.weight": "model-00192-of-00193.safetensors",
+ "model.layers.79.block_sparse_moe.experts.4.w3.weight": "model-00192-of-00193.safetensors",
+ "model.layers.79.block_sparse_moe.experts.5.w1.weight": "model-00192-of-00193.safetensors",
+ "model.layers.79.block_sparse_moe.experts.5.w2.weight": "model-00192-of-00193.safetensors",
+ "model.layers.79.block_sparse_moe.experts.5.w3.weight": "model-00192-of-00193.safetensors",
+ "model.layers.79.block_sparse_moe.experts.6.w1.weight": "model-00192-of-00193.safetensors",
+ "model.layers.79.block_sparse_moe.experts.6.w2.weight": "model-00192-of-00193.safetensors",
+ "model.layers.79.block_sparse_moe.experts.6.w3.weight": "model-00193-of-00193.safetensors",
+ "model.layers.79.block_sparse_moe.experts.7.w1.weight": "model-00193-of-00193.safetensors",
+ "model.layers.79.block_sparse_moe.experts.7.w2.weight": "model-00193-of-00193.safetensors",
+ "model.layers.79.block_sparse_moe.experts.7.w3.weight": "model-00193-of-00193.safetensors",
+ "model.layers.79.block_sparse_moe.gate.weight": "model-00190-of-00193.safetensors",
+ "model.layers.79.input_layernorm.weight": "model-00193-of-00193.safetensors",
+ "model.layers.79.post_attention_layernorm.weight": "model-00193-of-00193.safetensors",
+ "model.layers.79.self_attn.k_proj.weight": "model-00190-of-00193.safetensors",
+ "model.layers.79.self_attn.o_proj.weight": "model-00190-of-00193.safetensors",
+ "model.layers.79.self_attn.q_proj.weight": "model-00190-of-00193.safetensors",
+ "model.layers.79.self_attn.v_proj.weight": "model-00190-of-00193.safetensors",
+ "model.layers.8.block_sparse_moe.experts.0.w1.weight": "model-00020-of-00193.safetensors",
+ "model.layers.8.block_sparse_moe.experts.0.w2.weight": "model-00020-of-00193.safetensors",
+ "model.layers.8.block_sparse_moe.experts.0.w3.weight": "model-00020-of-00193.safetensors",
+ "model.layers.8.block_sparse_moe.experts.1.w1.weight": "model-00020-of-00193.safetensors",
+ "model.layers.8.block_sparse_moe.experts.1.w2.weight": "model-00021-of-00193.safetensors",
+ "model.layers.8.block_sparse_moe.experts.1.w3.weight": "model-00021-of-00193.safetensors",
+ "model.layers.8.block_sparse_moe.experts.2.w1.weight": "model-00021-of-00193.safetensors",
+ "model.layers.8.block_sparse_moe.experts.2.w2.weight": "model-00021-of-00193.safetensors",
+ "model.layers.8.block_sparse_moe.experts.2.w3.weight": "model-00021-of-00193.safetensors",
+ "model.layers.8.block_sparse_moe.experts.3.w1.weight": "model-00021-of-00193.safetensors",
+ "model.layers.8.block_sparse_moe.experts.3.w2.weight": "model-00021-of-00193.safetensors",
+ "model.layers.8.block_sparse_moe.experts.3.w3.weight": "model-00021-of-00193.safetensors",
+ "model.layers.8.block_sparse_moe.experts.4.w1.weight": "model-00021-of-00193.safetensors",
+ "model.layers.8.block_sparse_moe.experts.4.w2.weight": "model-00021-of-00193.safetensors",
+ "model.layers.8.block_sparse_moe.experts.4.w3.weight": "model-00022-of-00193.safetensors",
+ "model.layers.8.block_sparse_moe.experts.5.w1.weight": "model-00022-of-00193.safetensors",
+ "model.layers.8.block_sparse_moe.experts.5.w2.weight": "model-00022-of-00193.safetensors",
+ "model.layers.8.block_sparse_moe.experts.5.w3.weight": "model-00022-of-00193.safetensors",
+ "model.layers.8.block_sparse_moe.experts.6.w1.weight": "model-00022-of-00193.safetensors",
+ "model.layers.8.block_sparse_moe.experts.6.w2.weight": "model-00022-of-00193.safetensors",
+ "model.layers.8.block_sparse_moe.experts.6.w3.weight": "model-00022-of-00193.safetensors",
+ "model.layers.8.block_sparse_moe.experts.7.w1.weight": "model-00022-of-00193.safetensors",
+ "model.layers.8.block_sparse_moe.experts.7.w2.weight": "model-00022-of-00193.safetensors",
+ "model.layers.8.block_sparse_moe.experts.7.w3.weight": "model-00022-of-00193.safetensors",
+ "model.layers.8.block_sparse_moe.gate.weight": "model-00020-of-00193.safetensors",
+ "model.layers.8.input_layernorm.weight": "model-00022-of-00193.safetensors",
+ "model.layers.8.post_attention_layernorm.weight": "model-00022-of-00193.safetensors",
+ "model.layers.8.self_attn.k_proj.weight": "model-00020-of-00193.safetensors",
+ "model.layers.8.self_attn.o_proj.weight": "model-00020-of-00193.safetensors",
+ "model.layers.8.self_attn.q_proj.weight": "model-00020-of-00193.safetensors",
+ "model.layers.8.self_attn.v_proj.weight": "model-00020-of-00193.safetensors",
+ "model.layers.9.block_sparse_moe.experts.0.w1.weight": "model-00023-of-00193.safetensors",
+ "model.layers.9.block_sparse_moe.experts.0.w2.weight": "model-00023-of-00193.safetensors",
+ "model.layers.9.block_sparse_moe.experts.0.w3.weight": "model-00023-of-00193.safetensors",
+ "model.layers.9.block_sparse_moe.experts.1.w1.weight": "model-00023-of-00193.safetensors",
+ "model.layers.9.block_sparse_moe.experts.1.w2.weight": "model-00023-of-00193.safetensors",
+ "model.layers.9.block_sparse_moe.experts.1.w3.weight": "model-00023-of-00193.safetensors",
+ "model.layers.9.block_sparse_moe.experts.2.w1.weight": "model-00023-of-00193.safetensors",
+ "model.layers.9.block_sparse_moe.experts.2.w2.weight": "model-00023-of-00193.safetensors",
+ "model.layers.9.block_sparse_moe.experts.2.w3.weight": "model-00023-of-00193.safetensors",
+ "model.layers.9.block_sparse_moe.experts.3.w1.weight": "model-00023-of-00193.safetensors",
+ "model.layers.9.block_sparse_moe.experts.3.w2.weight": "model-00024-of-00193.safetensors",
+ "model.layers.9.block_sparse_moe.experts.3.w3.weight": "model-00024-of-00193.safetensors",
+ "model.layers.9.block_sparse_moe.experts.4.w1.weight": "model-00024-of-00193.safetensors",
+ "model.layers.9.block_sparse_moe.experts.4.w2.weight": "model-00024-of-00193.safetensors",
+ "model.layers.9.block_sparse_moe.experts.4.w3.weight": "model-00024-of-00193.safetensors",
+ "model.layers.9.block_sparse_moe.experts.5.w1.weight": "model-00024-of-00193.safetensors",
+ "model.layers.9.block_sparse_moe.experts.5.w2.weight": "model-00024-of-00193.safetensors",
+ "model.layers.9.block_sparse_moe.experts.5.w3.weight": "model-00024-of-00193.safetensors",
+ "model.layers.9.block_sparse_moe.experts.6.w1.weight": "model-00024-of-00193.safetensors",
+ "model.layers.9.block_sparse_moe.experts.6.w2.weight": "model-00024-of-00193.safetensors",
+ "model.layers.9.block_sparse_moe.experts.6.w3.weight": "model-00025-of-00193.safetensors",
+ "model.layers.9.block_sparse_moe.experts.7.w1.weight": "model-00025-of-00193.safetensors",
+ "model.layers.9.block_sparse_moe.experts.7.w2.weight": "model-00025-of-00193.safetensors",
+ "model.layers.9.block_sparse_moe.experts.7.w3.weight": "model-00025-of-00193.safetensors",
+ "model.layers.9.block_sparse_moe.gate.weight": "model-00022-of-00193.safetensors",
+ "model.layers.9.input_layernorm.weight": "model-00025-of-00193.safetensors",
+ "model.layers.9.post_attention_layernorm.weight": "model-00025-of-00193.safetensors",
+ "model.layers.9.self_attn.k_proj.weight": "model-00022-of-00193.safetensors",
+ "model.layers.9.self_attn.o_proj.weight": "model-00022-of-00193.safetensors",
+ "model.layers.9.self_attn.q_proj.weight": "model-00022-of-00193.safetensors",
+ "model.layers.9.self_attn.v_proj.weight": "model-00022-of-00193.safetensors",
+ "model.norm.weight": "model-00193-of-00193.safetensors"
+ }
+}
diff --git a/tokenizer.model b/tokenizer.model
new file mode 100644
index 0000000000000000000000000000000000000000..69016c8865cef891c0708d4734453821d9bba334
--- /dev/null
+++ b/tokenizer.model
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:008293028e1a9d9a1038d9b63d989a2319797dfeaa03f171093a57b33a3a8277
+size 1831879
diff --git a/tokenizer_config.json b/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..58b40b80ea960fdd3d52734465af72cea8291687
--- /dev/null
+++ b/tokenizer_config.json
@@ -0,0 +1,135 @@
+{
+ "extra_ids": 0,
+ "do_lower_case": false,
+ "keep_accents": true,
+ "bos_token": "",
+ "eos_token": "",
+ "unk_token": "",
+ "pad_token": "",
+ "mask_token": "",
+ "cls_token": "",
+ "sep_token": "",
+ "padding_side": "left",
+ "sp_model_kwargs": {},
+ "special_tokens_map_file": null,
+ "tokenizer_class": "LlamaTokenizer",
+ "added_tokens_decoder": {
+ "4": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "5": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "6": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "7": {
+ "content": "<|system|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "8": {
+ "content": "<|assistant|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "9": {
+ "content": "<|user|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "10": {
+ "content": "<|available_tools|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "11": {
+ "content": "<|tool_calls|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "12": {
+ "content": "<|tool_results|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "13": {
+ "content": "<|code|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "14": {
+ "content": "<|file|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "102397": {
+ "content": "<|prefix|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "102398": {
+ "content": "<|suffix|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ },
+ "102399": {
+ "content": "<|middle|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ }
+ },
+ "add_prefix_space": false,
+ "add_dummy_prefix_space": false,
+ "legacy": false,
+ "add_bos_token": false,
+ "add_eos_token": true
+}
\ No newline at end of file