diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..52373fe24473b1aa44333d318f578ae6bf04b49b 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/chat_template.jinja b/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..41478957aca7a04b7321022e7d1f73de5badd995 --- /dev/null +++ b/chat_template.jinja @@ -0,0 +1,103 @@ +[gMASK] +{%- if tools -%} +<|system|> +# Tools + +You may call one or more functions to assist with the user query. + +You are provided with function signatures within XML tags: + +{% for tool in tools %} +{{ tool | tojson(ensure_ascii=False) }} +{% endfor %} + + +For each function call, output the function name and arguments within the following XML format: +{function-name} +{arg-key-1} +{arg-value-1} +{arg-key-2} +{arg-value-2} +... +{%- endif -%} +{%- macro visible_text(content) -%} + {%- if content is string -%} + {{- content }} + {%- elif content is iterable and content is not mapping -%} + {%- for item in content -%} + {%- if item is mapping and item.type == 'text' -%} + {{- item.text }} + {%- elif item is string -%} + {{- item }} + {%- endif -%} + {%- endfor -%} + {%- else -%} + {{- content }} + {%- endif -%} +{%- endmacro -%} +{%- set ns = namespace(last_user_index=-1) %} +{%- for m in messages %} + {%- if m.role == 'user' %} + {% set ns.last_user_index = loop.index0 -%} + {%- endif %} +{%- endfor %} +{% for m in messages %} +{%- if m.role == 'user' -%}<|user|> +{{ visible_text(m.content) }} +{{- '/nothink' if (enable_thinking is defined and not enable_thinking and not visible_text(m.content).endswith("/nothink")) else '' -}} +{%- elif m.role == 'assistant' -%} +<|assistant|> +{%- set reasoning_content = '' %} +{%- set content = visible_text(m.content) %} +{%- if m.reasoning_content is string %} + {%- set reasoning_content = m.reasoning_content %} +{%- else %} + {%- if '' in content %} + {%- set reasoning_content = content.split('')[0].rstrip('\n').split('')[-1].lstrip('\n') %} + {%- set content = content.split('')[-1].lstrip('\n') %} + {%- endif %} +{%- endif %} +{%- if loop.index0 > ns.last_user_index and reasoning_content -%} +{{ '\n' + reasoning_content.strip() + ''}} +{%- else -%} +{{ '\n' }} +{%- endif -%} +{%- if content.strip() -%} +{{ '\n' + content.strip() }} +{%- endif -%} +{% if m.tool_calls %} +{% for tc in m.tool_calls %} +{%- if tc.function %} + {%- set tc = tc.function %} +{%- endif %} +{{ '\n' + tc.name }} +{% set _args = tc.arguments %} +{% for k, v in _args.items() %} +{{ k }} +{{ v | tojson(ensure_ascii=False) if v is not string else v }} +{% endfor %} +{% endfor %} +{% endif %} +{%- elif m.role == 'tool' -%} +{%- if m.content is string -%} +{%- if loop.first or (messages[loop.index0 - 1].role != "tool") %} + {{- '<|observation|>' }} +{%- endif %} +{{- '\n\n' }} +{{- m.content }} +{{- '\n' }} +{%- else -%} +<|observation|>{% for tr in m.content %} + + +{{ tr.output if tr.output is defined else tr }} +{% endfor -%} +{% endif -%} +{%- elif m.role == 'system' -%} +<|system|> +{{ visible_text(m.content) }} +{%- endif -%} +{%- endfor -%} +{%- if add_generation_prompt -%} + <|assistant|>{{- '\n' if (enable_thinking is defined and not enable_thinking) else '' -}} +{%- endif -%} \ No newline at end of file diff --git a/config.json b/config.json new file mode 100644 index 0000000000000000000000000000000000000000..6abb595d0634b717a6d0bad9979d2f1cc68716dd --- /dev/null +++ b/config.json @@ -0,0 +1,46 @@ +{ + "architectures": [ + "Glm4MoeForCausalLM" + ], + "attention_bias": true, + "attention_dropout": 0.0, + "auto_map": { + "AutoModelForCausalLM": "modeling_glm4_moe.Glm4MoeForCausalLM" + }, + "eos_token_id": [ + 151329, + 151336, + 151338 + ], + "first_k_dense_replace": 3, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 5120, + "initializer_range": 0.02, + "intermediate_size": 12288, + "max_position_embeddings": 202752, + "model_type": "glm4_moe", + "moe_intermediate_size": 1536, + "n_group": 1, + "n_routed_experts": 80, + "n_shared_experts": 1, + "norm_topk_prob": true, + "num_attention_heads": 96, + "num_experts_per_tok": 8, + "num_hidden_layers": 92, + "num_key_value_heads": 8, + "num_nextn_predict_layers": 1, + "pad_token_id": 151329, + "partial_rotary_factor": 0.5, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 1000000, + "routed_scaling_factor": 2.5, + "tie_word_embeddings": false, + "topk_group": 1, + "torch_dtype": "bfloat16", + "transformers_version": "4.55.0", + "use_cache": true, + "use_qk_norm": true, + "vocab_size": 151552 +} diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..05b54ecc5cec6cc2cbd20f2ade90000d0bf0deb7 --- /dev/null +++ b/generation_config.json @@ -0,0 +1,10 @@ +{ + "_from_model_config": true, + "eos_token_id": [ + 151329, + 151336, + 151338 + ], + "pad_token_id": 151329, + "transformers_version": "4.55.0" +} diff --git a/model-00001-of-00075.safetensors b/model-00001-of-00075.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..46ba82bf83f3121d11e9798db6573f103a3d8a04 --- /dev/null +++ b/model-00001-of-00075.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc2614aad7b7f125cbe6665005daf458deeb2415c4125ee5d1b49c7601676d14 +size 4986172552 diff --git a/model-00004-of-00075.safetensors b/model-00004-of-00075.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7b98c23ed37d49834097b2c0fc8d2b4cc5662353 --- /dev/null +++ b/model-00004-of-00075.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:493b79e8398d37f09af8ed27b9c86a5262412b106de57843aac55669531b7784 +size 4914326208 diff --git a/model-00005-of-00075.safetensors b/model-00005-of-00075.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ab708b5556cb5165c14f747a17a98e4a698359d6 --- /dev/null +++ b/model-00005-of-00075.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1183507706c9b62bc5c6cb21d50c9e35d458677be5134adbce802e054131355a +size 4997400520 diff --git a/model-00007-of-00075.safetensors b/model-00007-of-00075.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..92d58b0b3568d2224db12ec81cd77e125b96937b --- /dev/null +++ b/model-00007-of-00075.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0aa881e1ad09d159742c3e1a3e83d2c532b7017a236e6479fd5a8a3feae3a11e +size 4992129752 diff --git a/model-00008-of-00075.safetensors b/model-00008-of-00075.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b15a186f64c2379731dc7a8a2a4efd0c9b4eff78 --- /dev/null +++ b/model-00008-of-00075.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d89035f2d7cfa55395baadcb6d9af4fe75803566cac1924044de4cd68e393bfb +size 4992129888 diff --git a/model-00010-of-00075.safetensors b/model-00010-of-00075.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..dbbf711f59d1ee9d3bc811d4a054d0abd316c101 --- /dev/null +++ b/model-00010-of-00075.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1aaa899b941123f413c99515d60737d9204bcf9027fc71a29e44d1f9be3a5401 +size 4992129872 diff --git a/model-00016-of-00075.safetensors b/model-00016-of-00075.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..dcf742f756a36660cdb224c5879605e60fbe982a --- /dev/null +++ b/model-00016-of-00075.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:639ea6c4185edfb1fafd584333a17217c469f357239b6579ad127167e52a137c +size 4992129888 diff --git a/model-00020-of-00075.safetensors b/model-00020-of-00075.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a40aec4734474bdfecc05f2118c5a52a8fe8ddb9 --- /dev/null +++ b/model-00020-of-00075.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b47f9c7e0a956ceae7b24c6f40f49a3893ceb6b67fea265e0019234546789aa6 +size 4992129888 diff --git a/model-00022-of-00075.safetensors b/model-00022-of-00075.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3a728a2fa2d6c4c027cabd8644a4ad374ddf7b98 --- /dev/null +++ b/model-00022-of-00075.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5fd00ebf2d982f3953188a7fc07858f2ebc1caea9f3156bfd19e7d5ff30782f0 +size 4992129888 diff --git a/model-00023-of-00075.safetensors b/model-00023-of-00075.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d9dad08de831582c992a52d6efb917f74d29a299 --- /dev/null +++ b/model-00023-of-00075.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:102c92f4f24c856fff2b9bbb8061b4a8845b0b845e0491d82385059f38db340f +size 4998241280 diff --git a/model-00024-of-00075.safetensors b/model-00024-of-00075.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..15cdf5c3009d8be136ca84caffc105c0694bfc78 --- /dev/null +++ b/model-00024-of-00075.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b66f97680b69f674faa733cadd93681989cf821b5a1abe3671df82e16369133 +size 4992129888 diff --git a/model-00025-of-00075.safetensors b/model-00025-of-00075.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..464d503d8e3ac05b0561591b74c47b046b1b99ee --- /dev/null +++ b/model-00025-of-00075.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c47cf41434b3bae0b12f2368f1108b02eb47600cdd8e8d8f41fff05e76be933c +size 4992129888 diff --git a/model-00026-of-00075.safetensors b/model-00026-of-00075.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..76465451736c9869820e9844fcd88d20e5e3a3f6 --- /dev/null +++ b/model-00026-of-00075.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:270868fb827dcd0bc71383fc310955125461ef87954a35661feb107a4f71ff17 +size 4992129888 diff --git a/model-00027-of-00075.safetensors b/model-00027-of-00075.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..56f41f2ff5ab27e4501ee2c34cf1b4c96a01750c --- /dev/null +++ b/model-00027-of-00075.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50938aa2a5ad56baa0dd9799632e9f4e169ab71dee59046cf68694a47379e7a3 +size 4966783928 diff --git a/model-00028-of-00075.safetensors b/model-00028-of-00075.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..14fd43ebfc56821dafc83037596d843e00ebab51 --- /dev/null +++ b/model-00028-of-00075.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2649719678c0bff61e4e46fc51c96af3140f173db2187d5a2f3b2d20838ec829 +size 4992129712 diff --git a/model-00032-of-00075.safetensors b/model-00032-of-00075.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7db8baeaea6116fe924d116ff384c1399b246ad7 --- /dev/null +++ b/model-00032-of-00075.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a541c7503038db458ee565899ed2c762502d877cb21dfc68cb83ae51817d9897 +size 4998241288 diff --git a/model-00035-of-00075.safetensors b/model-00035-of-00075.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c05407017e33be89795ffd0a5ff14d19667e65a3 --- /dev/null +++ b/model-00035-of-00075.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79d9054b9d5008ce549c20f47fce85b20c5fc20fba97e27fd1e931f41b6bac17 +size 4992129888 diff --git a/model-00037-of-00075.safetensors b/model-00037-of-00075.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1cfb053f327850b4977ee4f54c3af5176706799b --- /dev/null +++ b/model-00037-of-00075.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8f1a9dd3c6482fac33eee59474a07d34dc47c33a43136aa01e46ea56ee4252d +size 4997400824 diff --git a/model-00038-of-00075.safetensors b/model-00038-of-00075.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4dd80034781284b4786e8eb38cd3917b0ad4d603 --- /dev/null +++ b/model-00038-of-00075.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70ebe15ead11ae38b3201bf5804bc2610afa277cc4a751447163e57d6c6640e7 +size 4992129888 diff --git a/model-00039-of-00075.safetensors b/model-00039-of-00075.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..73a2c25a51d9abd2feb8537235138f608ad61f54 --- /dev/null +++ b/model-00039-of-00075.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c130a9b072c765aaec3426883e4e5588ca991511637ff7317013386876259f6 +size 4992129888 diff --git a/model-00041-of-00075.safetensors b/model-00041-of-00075.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ea933bf5b7bb448beaac61d1ab6944dd837a16c1 --- /dev/null +++ b/model-00041-of-00075.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3e8959a0eaaa8c84e323ca03f33e85c4b42c63a5fe861d32f9e7f716a73b994 +size 4998241296 diff --git a/model-00042-of-00075.safetensors b/model-00042-of-00075.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..91fd8a998220bbfcb1e8e37b7ff9e5f89722c21c --- /dev/null +++ b/model-00042-of-00075.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef9f8ff060a8e72f12cc73ff390ae2e1689c039c4d7ad25a8933935bd8afdd32 +size 4992129872 diff --git a/model-00043-of-00075.safetensors b/model-00043-of-00075.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..867956590d899efb27bef81bb7d7f4127250fd32 --- /dev/null +++ b/model-00043-of-00075.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db490d5668c50934aeadf4bddca146658f2292cba09aafd69178a6ebfafffa35 +size 4992129888 diff --git a/model-00047-of-00075.safetensors b/model-00047-of-00075.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b021b5ca15ad034c234d3d2194237f23de42a9c4 --- /dev/null +++ b/model-00047-of-00075.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:199b559733ae5a1a9130c0b63a54d5a52dd8896c5dfddf192dd102e6c56671df +size 4992129888 diff --git a/model-00050-of-00075.safetensors b/model-00050-of-00075.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..097b6ee7f3508bc6eef35460a2122a6406ab2acd --- /dev/null +++ b/model-00050-of-00075.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88bc64b28d732b669e9a99daf7d6a9ba4146b9dfb63da21609e97c491d26b0fc +size 4998241304 diff --git a/model-00052-of-00075.safetensors b/model-00052-of-00075.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f34ee20d4b5a9998ff00d6f333244d7dbe9f4577 --- /dev/null +++ b/model-00052-of-00075.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c4b7f8275bf25321fb34015ef0e4f064068898a3408b4d64dd218dac7b59f63 +size 4992129888 diff --git a/model-00054-of-00075.safetensors b/model-00054-of-00075.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..32618ca4a73585d3faf128ad24f5f27f98010f79 --- /dev/null +++ b/model-00054-of-00075.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e9155526068ed0b07761b84d613f9df11759c8778955d3c0cab8dddf5e513bb +size 4992129888 diff --git a/model-00055-of-00075.safetensors b/model-00055-of-00075.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f6e824a5b3070422b43f1e37396489375bab4a92 --- /dev/null +++ b/model-00055-of-00075.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60e7adb20158d493162cd8d19de5c36b92c7a1fd1a2b5a61a77b87dd32b1fc8b +size 4998241280 diff --git a/model-00056-of-00075.safetensors b/model-00056-of-00075.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..997d7f93f3a3ca92fb9e67112115b1db58709033 --- /dev/null +++ b/model-00056-of-00075.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e0ec8f6944758593ecd406a1538962a81ab251580273955b7caa7a6d72ae00f +size 4992129888 diff --git a/model-00059-of-00075.safetensors b/model-00059-of-00075.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f4e891958dc52f59fdd07296087a397021358fa0 --- /dev/null +++ b/model-00059-of-00075.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40332b2c2e3073b3eadaca092cb06e85a5be36856cd3dc89ce8e13fa316f7b3d +size 4966783928 diff --git a/model-00060-of-00075.safetensors b/model-00060-of-00075.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..742de15bd96bfffbf4f2bc94b7c7480a9cb473cf --- /dev/null +++ b/model-00060-of-00075.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd800f47b5d18ec7a0eb7833a57087a1d5c0877ea52e62a81960c5f585ed7ab6 +size 4992129712 diff --git a/model-00061-of-00075.safetensors b/model-00061-of-00075.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2a0bee7e0048a280d209220b42d426ea7060a625 --- /dev/null +++ b/model-00061-of-00075.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7216dd3f84d8817efcc0369c9a2e996105b4a56a04babc067cb188d53d5c303d +size 4992129888 diff --git a/model-00064-of-00075.safetensors b/model-00064-of-00075.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e391998b027b9af748df3660096cafce339fc7f2 --- /dev/null +++ b/model-00064-of-00075.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fbcaa525dd438741cf4edde64edad984d535118cfb226f2d1df584664c118b87 +size 4998241288 diff --git a/model-00066-of-00075.safetensors b/model-00066-of-00075.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a6ff698e8b1f324c37a4bf936db96cc242f07588 --- /dev/null +++ b/model-00066-of-00075.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8578888eb758d57317b1763e61284e302f62697fae64dbba47c7f44a5cff7eee +size 4992129888 diff --git a/model-00067-of-00075.safetensors b/model-00067-of-00075.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7f187e131fd91843a9dd009308268b9aa1136c77 --- /dev/null +++ b/model-00067-of-00075.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4dbf39269b4bdec2d96c0f5cb128fef777288e18b76fdc24e9f8d15316d343e5 +size 4992129888 diff --git a/model-00068-of-00075.safetensors b/model-00068-of-00075.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..34623e44dad33951ab3de89ebc575a664e274b1a --- /dev/null +++ b/model-00068-of-00075.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd6da0d4f40e5652ced325a888746cf48caa17d65ee67bfa8c2ffe2a3b0e3987 +size 4961512816 diff --git a/model-00069-of-00075.safetensors b/model-00069-of-00075.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3d18cc40154aa9c2c6ecf50cd2d7b713b99f2426 --- /dev/null +++ b/model-00069-of-00075.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dbbb42468a112a1b466ec12a2e371c41f87a5248d1e87ede340d48f11a3f4363 +size 4997400824 diff --git a/model-00071-of-00075.safetensors b/model-00071-of-00075.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7001d4079df22ba971ad5ecc240291ff905dc3db --- /dev/null +++ b/model-00071-of-00075.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2b09a61b9db2866fe55e000a9142b4c6f4c8e2042c92a22914f366ab5955334 +size 4992129888 diff --git a/model-00072-of-00075.safetensors b/model-00072-of-00075.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b06c5b0d5ce8e748b36058ad1722f537ac8abf02 --- /dev/null +++ b/model-00072-of-00075.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:755b0f2c87b75ff87d0768f86ed0f60f952222a3abd335978567cbd136c45774 +size 4992129888 diff --git a/model-00074-of-00075.safetensors b/model-00074-of-00075.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8738fb6adca2ad8926f834928215d780a2f8397a --- /dev/null +++ b/model-00074-of-00075.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77b1a60271e8afe5762f608bab4edd3e0b1a40e3c8234065744e13e480189b11 +size 3697110504 diff --git a/model-00075-of-00075.safetensors b/model-00075-of-00075.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..898ac7fae2f21ce370168e6a67b83112a19b0ce7 --- /dev/null +++ b/model-00075-of-00075.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7ed732a1f52d39814adc6a0b127bad1c816b89d7055fa07b029cba98deba7bf +size 1551892608 diff --git a/model.safetensors.index.json b/model.safetensors.index.json new file mode 100644 index 0000000000000000000000000000000000000000..0dec2e856e925536f8bef4650ae7e82acdb5abe6 --- /dev/null +++ b/model.safetensors.index.json @@ -0,0 +1,22837 @@ +{ + "metadata": { + "total_parameters": 184779485184, + "total_size": 369558998848 + }, + "weight_map": { + "lm_head.weight": "model-00075-of-00075.safetensors", + "model.embed_tokens.weight": "model-00001-of-00075.safetensors", + "model.layers.0.input_layernorm.weight": "model-00001-of-00075.safetensors", + "model.layers.0.mlp.down_proj.weight": "model-00001-of-00075.safetensors", + "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00075.safetensors", + "model.layers.0.mlp.up_proj.weight": "model-00001-of-00075.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00075.safetensors", + "model.layers.0.self_attn.k_norm.weight": "model-00001-of-00075.safetensors", + "model.layers.0.self_attn.k_proj.bias": "model-00001-of-00075.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00075.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00075.safetensors", + "model.layers.0.self_attn.q_norm.weight": "model-00001-of-00075.safetensors", + "model.layers.0.self_attn.q_proj.bias": "model-00001-of-00075.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00075.safetensors", + "model.layers.0.self_attn.v_proj.bias": "model-00001-of-00075.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00075.safetensors", + "model.layers.1.input_layernorm.weight": "model-00001-of-00075.safetensors", + "model.layers.1.mlp.down_proj.weight": "model-00001-of-00075.safetensors", + "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00075.safetensors", + "model.layers.1.mlp.up_proj.weight": "model-00001-of-00075.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00075.safetensors", + "model.layers.1.self_attn.k_norm.weight": "model-00001-of-00075.safetensors", + "model.layers.1.self_attn.k_proj.bias": "model-00001-of-00075.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00075.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00075.safetensors", + "model.layers.1.self_attn.q_norm.weight": "model-00001-of-00075.safetensors", + "model.layers.1.self_attn.q_proj.bias": "model-00001-of-00075.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00075.safetensors", + "model.layers.1.self_attn.v_proj.bias": "model-00001-of-00075.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00075.safetensors", + "model.layers.10.input_layernorm.weight": "model-00008-of-00075.safetensors", + "model.layers.10.mlp.experts.0.down_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.0.gate_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.0.up_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.1.down_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.1.gate_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.1.up_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.10.down_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.10.gate_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.10.up_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.11.down_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.11.gate_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.11.up_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.12.down_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.12.gate_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.12.up_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.13.down_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.13.gate_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.13.up_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.14.down_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.14.gate_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.14.up_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.15.down_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.15.gate_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.15.up_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.16.down_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.16.gate_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.16.up_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.17.down_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.17.gate_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.17.up_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.18.down_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.18.gate_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.18.up_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.19.down_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.19.gate_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.19.up_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.2.down_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.2.gate_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.2.up_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.20.down_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.20.gate_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.20.up_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.21.down_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.21.gate_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.21.up_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.22.down_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.22.gate_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.22.up_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.23.down_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.23.gate_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.23.up_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.24.down_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.24.gate_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.24.up_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.25.down_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.25.gate_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.25.up_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.26.down_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.26.gate_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.26.up_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.27.down_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.27.gate_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.27.up_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.28.down_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.28.gate_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.28.up_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.29.down_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.29.gate_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.29.up_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.3.down_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.3.gate_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.3.up_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.30.down_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.30.gate_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.30.up_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.31.down_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.31.gate_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.31.up_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.32.down_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.32.gate_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.32.up_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.33.down_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.33.gate_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.33.up_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.34.down_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.34.gate_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.34.up_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.35.down_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.35.gate_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.35.up_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.36.down_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.36.gate_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.36.up_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.37.down_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.37.gate_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.37.up_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.38.down_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.38.gate_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.38.up_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.39.down_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.39.gate_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.39.up_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.4.down_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.4.gate_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.4.up_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.40.down_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.40.gate_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.40.up_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.41.down_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.41.gate_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.41.up_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.42.down_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.42.gate_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.42.up_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.43.down_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.43.gate_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.43.up_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.44.down_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.44.gate_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.44.up_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.45.down_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.45.gate_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.45.up_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.46.down_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.46.gate_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.46.up_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.47.down_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.47.gate_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.47.up_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.48.down_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.48.gate_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.48.up_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.49.down_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.49.gate_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.49.up_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.5.down_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.5.gate_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.5.up_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.50.down_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.50.gate_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.50.up_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.51.down_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.10.mlp.experts.51.gate_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.51.up_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.10.mlp.experts.52.down_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.10.mlp.experts.52.gate_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.10.mlp.experts.52.up_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.10.mlp.experts.53.down_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.10.mlp.experts.53.gate_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.10.mlp.experts.53.up_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.10.mlp.experts.54.down_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.10.mlp.experts.54.gate_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.10.mlp.experts.54.up_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.10.mlp.experts.55.down_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.10.mlp.experts.55.gate_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.10.mlp.experts.55.up_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.10.mlp.experts.56.down_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.10.mlp.experts.56.gate_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.10.mlp.experts.56.up_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.10.mlp.experts.57.down_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.10.mlp.experts.57.gate_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.10.mlp.experts.57.up_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.10.mlp.experts.58.down_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.10.mlp.experts.58.gate_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.10.mlp.experts.58.up_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.10.mlp.experts.59.down_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.10.mlp.experts.59.gate_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.10.mlp.experts.59.up_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.10.mlp.experts.6.down_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.6.gate_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.6.up_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.60.down_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.10.mlp.experts.60.gate_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.10.mlp.experts.60.up_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.10.mlp.experts.61.down_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.10.mlp.experts.61.gate_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.10.mlp.experts.61.up_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.10.mlp.experts.62.down_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.10.mlp.experts.62.gate_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.10.mlp.experts.62.up_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.10.mlp.experts.63.down_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.10.mlp.experts.63.gate_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.10.mlp.experts.63.up_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.10.mlp.experts.64.down_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.10.mlp.experts.64.gate_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.10.mlp.experts.64.up_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.10.mlp.experts.65.down_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.10.mlp.experts.65.gate_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.10.mlp.experts.65.up_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.10.mlp.experts.66.down_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.10.mlp.experts.66.gate_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.10.mlp.experts.66.up_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.10.mlp.experts.67.down_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.10.mlp.experts.67.gate_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.10.mlp.experts.67.up_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.10.mlp.experts.68.down_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.10.mlp.experts.68.gate_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.10.mlp.experts.68.up_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.10.mlp.experts.69.down_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.10.mlp.experts.69.gate_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.10.mlp.experts.69.up_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.10.mlp.experts.7.down_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.7.gate_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.7.up_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.70.down_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.10.mlp.experts.70.gate_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.10.mlp.experts.70.up_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.10.mlp.experts.71.down_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.10.mlp.experts.71.gate_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.10.mlp.experts.71.up_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.10.mlp.experts.72.down_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.10.mlp.experts.72.gate_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.10.mlp.experts.72.up_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.10.mlp.experts.73.down_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.10.mlp.experts.73.gate_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.10.mlp.experts.73.up_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.10.mlp.experts.74.down_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.10.mlp.experts.74.gate_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.10.mlp.experts.74.up_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.10.mlp.experts.75.down_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.10.mlp.experts.75.gate_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.10.mlp.experts.75.up_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.10.mlp.experts.76.down_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.10.mlp.experts.76.gate_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.10.mlp.experts.76.up_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.10.mlp.experts.77.down_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.10.mlp.experts.77.gate_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.10.mlp.experts.77.up_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.10.mlp.experts.78.down_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.10.mlp.experts.78.gate_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.10.mlp.experts.78.up_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.10.mlp.experts.79.down_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.10.mlp.experts.79.gate_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.10.mlp.experts.79.up_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.10.mlp.experts.8.down_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.8.gate_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.8.up_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.9.down_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.9.gate_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.experts.9.up_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.mlp.gate.e_score_correction_bias": "model-00008-of-00075.safetensors", + "model.layers.10.mlp.gate.weight": "model-00008-of-00075.safetensors", + "model.layers.10.mlp.shared_experts.down_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.10.mlp.shared_experts.gate_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.10.mlp.shared_experts.up_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00008-of-00075.safetensors", + "model.layers.10.self_attn.k_norm.weight": "model-00007-of-00075.safetensors", + "model.layers.10.self_attn.k_proj.bias": "model-00007-of-00075.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.self_attn.q_norm.weight": "model-00007-of-00075.safetensors", + "model.layers.10.self_attn.q_proj.bias": "model-00007-of-00075.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.10.self_attn.v_proj.bias": "model-00007-of-00075.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.11.input_layernorm.weight": "model-00009-of-00075.safetensors", + "model.layers.11.mlp.experts.0.down_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.0.gate_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.0.up_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.1.down_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.1.gate_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.1.up_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.10.down_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.10.gate_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.10.up_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.11.down_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.11.gate_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.11.up_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.12.down_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.12.gate_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.12.up_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.13.down_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.13.gate_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.13.up_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.14.down_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.14.gate_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.14.up_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.15.down_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.15.gate_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.15.up_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.16.down_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.16.gate_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.16.up_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.17.down_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.17.gate_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.17.up_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.18.down_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.18.gate_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.18.up_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.19.down_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.19.gate_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.19.up_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.2.down_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.2.gate_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.2.up_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.20.down_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.20.gate_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.20.up_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.21.down_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.21.gate_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.21.up_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.22.down_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.22.gate_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.22.up_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.23.down_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.23.gate_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.23.up_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.24.down_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.24.gate_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.24.up_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.25.down_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.25.gate_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.25.up_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.26.down_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.26.gate_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.26.up_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.27.down_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.27.gate_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.27.up_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.28.down_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.28.gate_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.28.up_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.29.down_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.29.gate_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.29.up_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.3.down_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.3.gate_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.3.up_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.30.down_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.30.gate_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.30.up_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.31.down_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.31.gate_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.31.up_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.32.down_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.32.gate_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.32.up_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.33.down_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.33.gate_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.33.up_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.34.down_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.34.gate_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.34.up_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.35.down_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.35.gate_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.35.up_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.36.down_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.36.gate_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.36.up_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.37.down_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.37.gate_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.37.up_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.38.down_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.38.gate_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.38.up_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.39.down_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.39.gate_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.39.up_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.4.down_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.4.gate_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.4.up_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.40.down_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.40.gate_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.40.up_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.41.down_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.41.gate_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.41.up_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.42.down_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.42.gate_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.42.up_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.43.down_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.43.gate_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.43.up_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.44.down_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.44.gate_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.44.up_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.45.down_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.45.gate_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.45.up_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.46.down_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.46.gate_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.46.up_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.47.down_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.47.gate_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.47.up_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.48.down_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.48.gate_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.48.up_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.49.down_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.49.gate_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.49.up_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.5.down_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.5.gate_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.5.up_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.50.down_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.50.gate_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.50.up_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.51.down_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.51.gate_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.51.up_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.52.down_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.52.gate_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.52.up_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.53.down_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.53.gate_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.53.up_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.54.down_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.54.gate_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.54.up_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.55.down_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.55.gate_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.55.up_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.56.down_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.56.gate_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.56.up_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.57.down_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.57.gate_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.57.up_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.58.down_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.58.gate_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.58.up_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.59.down_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.59.gate_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.59.up_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.6.down_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.6.gate_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.6.up_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.60.down_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.60.gate_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.60.up_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.61.down_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.61.gate_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.61.up_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.62.down_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.62.gate_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.62.up_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.63.down_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.63.gate_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.63.up_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.64.down_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.64.gate_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.64.up_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.65.down_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.65.gate_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.65.up_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.66.down_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.66.gate_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.66.up_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.67.down_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.67.gate_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.67.up_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.68.down_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.68.gate_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.68.up_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.69.down_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.69.gate_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.69.up_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.7.down_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.7.gate_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.7.up_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.70.down_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.11.mlp.experts.70.gate_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.70.up_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.11.mlp.experts.71.down_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.11.mlp.experts.71.gate_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.11.mlp.experts.71.up_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.11.mlp.experts.72.down_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.11.mlp.experts.72.gate_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.11.mlp.experts.72.up_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.11.mlp.experts.73.down_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.11.mlp.experts.73.gate_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.11.mlp.experts.73.up_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.11.mlp.experts.74.down_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.11.mlp.experts.74.gate_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.11.mlp.experts.74.up_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.11.mlp.experts.75.down_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.11.mlp.experts.75.gate_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.11.mlp.experts.75.up_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.11.mlp.experts.76.down_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.11.mlp.experts.76.gate_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.11.mlp.experts.76.up_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.11.mlp.experts.77.down_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.11.mlp.experts.77.gate_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.11.mlp.experts.77.up_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.11.mlp.experts.78.down_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.11.mlp.experts.78.gate_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.11.mlp.experts.78.up_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.11.mlp.experts.79.down_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.11.mlp.experts.79.gate_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.11.mlp.experts.79.up_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.11.mlp.experts.8.down_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.8.gate_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.8.up_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.9.down_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.9.gate_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.experts.9.up_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.mlp.gate.e_score_correction_bias": "model-00009-of-00075.safetensors", + "model.layers.11.mlp.gate.weight": "model-00009-of-00075.safetensors", + "model.layers.11.mlp.shared_experts.down_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.11.mlp.shared_experts.gate_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.11.mlp.shared_experts.up_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00009-of-00075.safetensors", + "model.layers.11.self_attn.k_norm.weight": "model-00008-of-00075.safetensors", + "model.layers.11.self_attn.k_proj.bias": "model-00008-of-00075.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.self_attn.q_norm.weight": "model-00008-of-00075.safetensors", + "model.layers.11.self_attn.q_proj.bias": "model-00008-of-00075.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.11.self_attn.v_proj.bias": "model-00008-of-00075.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00008-of-00075.safetensors", + "model.layers.12.input_layernorm.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.0.down_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.0.gate_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.0.up_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.1.down_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.1.gate_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.1.up_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.10.down_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.10.gate_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.10.up_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.11.down_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.11.gate_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.11.up_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.12.down_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.12.gate_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.12.up_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.13.down_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.13.gate_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.13.up_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.14.down_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.14.gate_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.14.up_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.15.down_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.15.gate_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.15.up_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.16.down_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.16.gate_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.16.up_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.17.down_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.17.gate_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.17.up_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.18.down_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.18.gate_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.18.up_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.19.down_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.19.gate_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.19.up_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.2.down_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.2.gate_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.2.up_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.20.down_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.20.gate_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.20.up_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.21.down_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.21.gate_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.21.up_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.22.down_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.22.gate_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.22.up_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.23.down_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.23.gate_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.23.up_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.24.down_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.24.gate_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.24.up_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.25.down_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.25.gate_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.25.up_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.26.down_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.26.gate_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.26.up_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.27.down_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.27.gate_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.27.up_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.28.down_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.28.gate_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.28.up_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.29.down_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.29.gate_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.29.up_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.3.down_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.3.gate_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.3.up_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.30.down_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.30.gate_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.30.up_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.31.down_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.31.gate_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.31.up_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.32.down_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.32.gate_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.32.up_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.33.down_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.33.gate_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.33.up_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.34.down_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.34.gate_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.34.up_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.35.down_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.35.gate_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.35.up_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.36.down_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.36.gate_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.36.up_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.37.down_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.37.gate_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.37.up_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.38.down_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.38.gate_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.38.up_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.39.down_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.39.gate_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.39.up_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.4.down_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.4.gate_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.4.up_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.40.down_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.40.gate_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.40.up_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.41.down_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.41.gate_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.41.up_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.42.down_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.42.gate_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.42.up_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.43.down_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.43.gate_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.43.up_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.44.down_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.44.gate_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.44.up_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.45.down_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.45.gate_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.45.up_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.46.down_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.46.gate_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.46.up_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.47.down_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.47.gate_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.47.up_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.48.down_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.48.gate_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.48.up_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.49.down_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.49.gate_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.49.up_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.5.down_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.5.gate_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.5.up_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.50.down_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.50.gate_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.50.up_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.51.down_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.51.gate_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.51.up_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.52.down_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.52.gate_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.52.up_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.53.down_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.53.gate_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.53.up_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.54.down_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.54.gate_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.54.up_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.55.down_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.55.gate_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.55.up_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.56.down_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.56.gate_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.56.up_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.57.down_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.57.gate_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.57.up_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.58.down_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.58.gate_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.58.up_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.59.down_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.59.gate_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.59.up_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.6.down_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.6.gate_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.6.up_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.60.down_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.60.gate_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.60.up_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.61.down_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.61.gate_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.61.up_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.62.down_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.62.gate_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.62.up_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.63.down_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.63.gate_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.63.up_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.64.down_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.64.gate_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.64.up_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.65.down_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.65.gate_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.65.up_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.66.down_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.66.gate_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.66.up_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.67.down_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.67.gate_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.67.up_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.68.down_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.68.gate_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.68.up_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.69.down_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.69.gate_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.69.up_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.7.down_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.7.gate_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.7.up_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.70.down_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.70.gate_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.70.up_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.71.down_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.71.gate_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.71.up_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.72.down_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.72.gate_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.72.up_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.73.down_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.73.gate_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.73.up_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.74.down_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.74.gate_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.74.up_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.75.down_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.75.gate_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.75.up_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.76.down_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.76.gate_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.76.up_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.77.down_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.77.gate_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.77.up_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.78.down_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.78.gate_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.78.up_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.79.down_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.79.gate_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.79.up_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.8.down_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.8.gate_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.8.up_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.9.down_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.9.gate_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.experts.9.up_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.gate.e_score_correction_bias": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.gate.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.shared_experts.down_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.shared_experts.gate_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.mlp.shared_experts.up_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00009-of-00075.safetensors", + "model.layers.12.self_attn.k_norm.weight": "model-00009-of-00075.safetensors", + "model.layers.12.self_attn.k_proj.bias": "model-00009-of-00075.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.self_attn.q_norm.weight": "model-00009-of-00075.safetensors", + "model.layers.12.self_attn.q_proj.bias": "model-00009-of-00075.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.12.self_attn.v_proj.bias": "model-00009-of-00075.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.13.input_layernorm.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.0.down_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.13.mlp.experts.0.gate_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.13.mlp.experts.0.up_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.13.mlp.experts.1.down_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.13.mlp.experts.1.gate_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.13.mlp.experts.1.up_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.13.mlp.experts.10.down_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.10.gate_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.10.up_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.11.down_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.11.gate_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.11.up_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.12.down_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.12.gate_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.12.up_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.13.down_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.13.gate_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.13.up_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.14.down_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.14.gate_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.14.up_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.15.down_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.15.gate_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.15.up_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.16.down_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.16.gate_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.16.up_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.17.down_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.17.gate_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.17.up_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.18.down_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.18.gate_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.18.up_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.19.down_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.19.gate_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.19.up_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.2.down_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.2.gate_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.13.mlp.experts.2.up_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.13.mlp.experts.20.down_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.20.gate_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.20.up_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.21.down_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.21.gate_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.21.up_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.22.down_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.22.gate_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.22.up_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.23.down_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.23.gate_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.23.up_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.24.down_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.24.gate_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.24.up_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.25.down_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.25.gate_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.25.up_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.26.down_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.26.gate_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.26.up_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.27.down_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.27.gate_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.27.up_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.28.down_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.28.gate_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.28.up_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.29.down_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.29.gate_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.29.up_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.3.down_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.3.gate_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.3.up_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.30.down_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.30.gate_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.30.up_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.31.down_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.31.gate_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.31.up_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.32.down_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.32.gate_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.32.up_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.33.down_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.33.gate_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.33.up_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.34.down_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.34.gate_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.34.up_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.35.down_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.35.gate_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.35.up_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.36.down_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.36.gate_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.36.up_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.37.down_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.37.gate_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.37.up_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.38.down_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.38.gate_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.38.up_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.39.down_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.39.gate_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.39.up_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.4.down_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.4.gate_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.4.up_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.40.down_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.40.gate_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.40.up_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.41.down_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.41.gate_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.41.up_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.42.down_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.42.gate_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.42.up_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.43.down_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.43.gate_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.43.up_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.44.down_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.44.gate_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.44.up_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.45.down_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.45.gate_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.45.up_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.46.down_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.46.gate_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.46.up_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.47.down_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.47.gate_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.47.up_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.48.down_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.48.gate_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.48.up_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.49.down_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.49.gate_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.49.up_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.5.down_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.5.gate_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.5.up_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.50.down_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.50.gate_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.50.up_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.51.down_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.51.gate_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.51.up_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.52.down_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.52.gate_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.52.up_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.53.down_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.53.gate_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.53.up_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.54.down_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.54.gate_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.54.up_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.55.down_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.55.gate_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.55.up_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.56.down_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.56.gate_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.56.up_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.57.down_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.57.gate_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.57.up_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.58.down_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.58.gate_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.58.up_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.59.down_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.59.gate_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.59.up_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.6.down_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.6.gate_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.6.up_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.60.down_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.60.gate_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.60.up_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.61.down_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.61.gate_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.61.up_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.62.down_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.62.gate_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.62.up_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.63.down_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.63.gate_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.63.up_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.64.down_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.64.gate_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.64.up_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.65.down_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.65.gate_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.65.up_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.66.down_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.66.gate_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.66.up_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.67.down_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.67.gate_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.67.up_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.68.down_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.68.gate_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.68.up_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.69.down_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.69.gate_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.69.up_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.7.down_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.7.gate_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.7.up_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.70.down_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.70.gate_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.70.up_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.71.down_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.71.gate_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.71.up_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.72.down_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.72.gate_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.72.up_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.73.down_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.73.gate_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.73.up_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.74.down_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.74.gate_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.74.up_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.75.down_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.75.gate_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.75.up_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.76.down_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.76.gate_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.76.up_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.77.down_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.77.gate_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.77.up_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.78.down_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.78.gate_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.78.up_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.79.down_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.79.gate_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.79.up_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.8.down_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.8.gate_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.8.up_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.9.down_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.9.gate_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.experts.9.up_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.gate.e_score_correction_bias": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.gate.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.shared_experts.down_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.shared_experts.gate_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.mlp.shared_experts.up_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00010-of-00075.safetensors", + "model.layers.13.self_attn.k_norm.weight": "model-00009-of-00075.safetensors", + "model.layers.13.self_attn.k_proj.bias": "model-00009-of-00075.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.13.self_attn.q_norm.weight": "model-00009-of-00075.safetensors", + "model.layers.13.self_attn.q_proj.bias": "model-00009-of-00075.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.13.self_attn.v_proj.bias": "model-00009-of-00075.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00009-of-00075.safetensors", + "model.layers.14.input_layernorm.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.0.down_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.14.mlp.experts.0.gate_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.14.mlp.experts.0.up_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.14.mlp.experts.1.down_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.14.mlp.experts.1.gate_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.14.mlp.experts.1.up_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.14.mlp.experts.10.down_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.14.mlp.experts.10.gate_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.14.mlp.experts.10.up_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.14.mlp.experts.11.down_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.14.mlp.experts.11.gate_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.14.mlp.experts.11.up_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.14.mlp.experts.12.down_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.14.mlp.experts.12.gate_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.14.mlp.experts.12.up_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.14.mlp.experts.13.down_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.14.mlp.experts.13.gate_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.14.mlp.experts.13.up_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.14.mlp.experts.14.down_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.14.mlp.experts.14.gate_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.14.mlp.experts.14.up_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.14.mlp.experts.15.down_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.14.mlp.experts.15.gate_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.14.mlp.experts.15.up_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.14.mlp.experts.16.down_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.14.mlp.experts.16.gate_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.14.mlp.experts.16.up_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.14.mlp.experts.17.down_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.14.mlp.experts.17.gate_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.14.mlp.experts.17.up_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.14.mlp.experts.18.down_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.14.mlp.experts.18.gate_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.14.mlp.experts.18.up_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.14.mlp.experts.19.down_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.14.mlp.experts.19.gate_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.14.mlp.experts.19.up_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.14.mlp.experts.2.down_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.14.mlp.experts.2.gate_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.14.mlp.experts.2.up_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.14.mlp.experts.20.down_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.14.mlp.experts.20.gate_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.14.mlp.experts.20.up_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.14.mlp.experts.21.down_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.21.gate_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.14.mlp.experts.21.up_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.14.mlp.experts.22.down_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.22.gate_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.22.up_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.23.down_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.23.gate_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.23.up_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.24.down_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.24.gate_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.24.up_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.25.down_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.25.gate_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.25.up_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.26.down_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.26.gate_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.26.up_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.27.down_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.27.gate_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.27.up_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.28.down_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.28.gate_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.28.up_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.29.down_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.29.gate_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.29.up_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.3.down_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.14.mlp.experts.3.gate_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.14.mlp.experts.3.up_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.14.mlp.experts.30.down_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.30.gate_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.30.up_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.31.down_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.31.gate_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.31.up_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.32.down_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.32.gate_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.32.up_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.33.down_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.33.gate_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.33.up_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.34.down_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.34.gate_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.34.up_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.35.down_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.35.gate_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.35.up_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.36.down_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.36.gate_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.36.up_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.37.down_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.37.gate_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.37.up_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.38.down_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.38.gate_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.38.up_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.39.down_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.39.gate_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.39.up_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.4.down_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.14.mlp.experts.4.gate_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.14.mlp.experts.4.up_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.14.mlp.experts.40.down_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.40.gate_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.40.up_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.41.down_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.41.gate_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.41.up_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.42.down_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.42.gate_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.42.up_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.43.down_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.43.gate_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.43.up_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.44.down_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.44.gate_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.44.up_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.45.down_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.45.gate_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.45.up_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.46.down_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.46.gate_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.46.up_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.47.down_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.47.gate_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.47.up_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.48.down_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.48.gate_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.48.up_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.49.down_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.49.gate_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.49.up_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.5.down_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.14.mlp.experts.5.gate_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.14.mlp.experts.5.up_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.14.mlp.experts.50.down_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.50.gate_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.50.up_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.51.down_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.51.gate_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.51.up_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.52.down_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.52.gate_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.52.up_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.53.down_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.53.gate_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.53.up_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.54.down_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.54.gate_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.54.up_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.55.down_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.55.gate_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.55.up_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.56.down_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.56.gate_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.56.up_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.57.down_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.57.gate_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.57.up_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.58.down_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.58.gate_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.58.up_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.59.down_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.59.gate_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.59.up_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.6.down_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.14.mlp.experts.6.gate_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.14.mlp.experts.6.up_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.14.mlp.experts.60.down_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.60.gate_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.60.up_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.61.down_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.61.gate_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.61.up_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.62.down_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.62.gate_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.62.up_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.63.down_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.63.gate_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.63.up_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.64.down_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.64.gate_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.64.up_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.65.down_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.65.gate_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.65.up_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.66.down_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.66.gate_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.66.up_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.67.down_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.67.gate_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.67.up_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.68.down_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.68.gate_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.68.up_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.69.down_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.69.gate_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.69.up_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.7.down_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.14.mlp.experts.7.gate_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.14.mlp.experts.7.up_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.14.mlp.experts.70.down_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.70.gate_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.70.up_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.71.down_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.71.gate_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.71.up_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.72.down_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.72.gate_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.72.up_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.73.down_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.73.gate_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.73.up_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.74.down_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.74.gate_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.74.up_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.75.down_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.75.gate_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.75.up_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.76.down_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.76.gate_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.76.up_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.77.down_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.77.gate_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.77.up_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.78.down_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.78.gate_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.78.up_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.79.down_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.79.gate_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.79.up_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.experts.8.down_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.14.mlp.experts.8.gate_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.14.mlp.experts.8.up_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.14.mlp.experts.9.down_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.14.mlp.experts.9.gate_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.14.mlp.experts.9.up_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.14.mlp.gate.e_score_correction_bias": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.gate.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.shared_experts.down_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.shared_experts.gate_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.mlp.shared_experts.up_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00011-of-00075.safetensors", + "model.layers.14.self_attn.k_norm.weight": "model-00010-of-00075.safetensors", + "model.layers.14.self_attn.k_proj.bias": "model-00010-of-00075.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.14.self_attn.q_norm.weight": "model-00010-of-00075.safetensors", + "model.layers.14.self_attn.q_proj.bias": "model-00010-of-00075.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.14.self_attn.v_proj.bias": "model-00010-of-00075.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00010-of-00075.safetensors", + "model.layers.15.input_layernorm.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.0.down_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.0.gate_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.0.up_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.1.down_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.1.gate_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.1.up_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.10.down_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.10.gate_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.10.up_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.11.down_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.11.gate_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.11.up_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.12.down_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.12.gate_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.12.up_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.13.down_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.13.gate_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.13.up_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.14.down_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.14.gate_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.14.up_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.15.down_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.15.gate_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.15.up_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.16.down_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.16.gate_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.16.up_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.17.down_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.17.gate_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.17.up_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.18.down_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.18.gate_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.18.up_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.19.down_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.19.gate_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.19.up_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.2.down_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.2.gate_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.2.up_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.20.down_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.20.gate_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.20.up_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.21.down_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.21.gate_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.21.up_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.22.down_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.22.gate_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.22.up_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.23.down_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.23.gate_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.23.up_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.24.down_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.24.gate_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.24.up_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.25.down_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.25.gate_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.25.up_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.26.down_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.26.gate_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.26.up_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.27.down_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.27.gate_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.27.up_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.28.down_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.28.gate_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.28.up_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.29.down_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.29.gate_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.29.up_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.3.down_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.3.gate_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.3.up_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.30.down_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.30.gate_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.30.up_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.31.down_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.31.gate_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.31.up_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.32.down_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.32.gate_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.32.up_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.33.down_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.33.gate_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.33.up_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.34.down_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.34.gate_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.34.up_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.35.down_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.35.gate_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.35.up_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.36.down_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.36.gate_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.36.up_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.37.down_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.37.gate_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.37.up_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.38.down_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.38.gate_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.38.up_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.39.down_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.39.gate_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.39.up_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.4.down_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.4.gate_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.4.up_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.40.down_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.40.gate_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.40.up_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.41.down_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.41.gate_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.41.up_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.42.down_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.42.gate_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.42.up_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.43.down_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.43.gate_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.43.up_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.44.down_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.44.gate_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.44.up_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.45.down_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.45.gate_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.45.up_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.46.down_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.46.gate_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.46.up_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.47.down_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.47.gate_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.47.up_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.48.down_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.48.gate_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.48.up_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.49.down_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.49.gate_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.49.up_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.5.down_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.5.gate_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.5.up_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.50.down_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.50.gate_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.50.up_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.51.down_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.51.gate_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.51.up_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.52.down_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.52.gate_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.52.up_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.53.down_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.53.gate_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.53.up_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.54.down_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.54.gate_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.54.up_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.55.down_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.55.gate_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.55.up_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.56.down_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.56.gate_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.56.up_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.57.down_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.57.gate_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.57.up_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.58.down_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.58.gate_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.58.up_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.59.down_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.59.gate_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.59.up_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.6.down_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.6.gate_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.6.up_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.60.down_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.60.gate_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.60.up_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.61.down_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.61.gate_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.61.up_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.62.down_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.62.gate_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.62.up_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.63.down_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.63.gate_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.63.up_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.64.down_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.64.gate_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.64.up_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.65.down_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.65.gate_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.65.up_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.66.down_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.66.gate_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.66.up_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.67.down_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.67.gate_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.67.up_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.68.down_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.68.gate_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.68.up_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.69.down_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.69.gate_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.69.up_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.7.down_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.7.gate_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.7.up_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.70.down_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.70.gate_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.70.up_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.71.down_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.71.gate_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.71.up_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.72.down_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.72.gate_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.72.up_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.73.down_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.73.gate_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.73.up_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.74.down_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.74.gate_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.74.up_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.75.down_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.75.gate_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.75.up_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.76.down_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.76.gate_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.76.up_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.77.down_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.77.gate_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.77.up_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.78.down_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.78.gate_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.78.up_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.79.down_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.79.gate_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.79.up_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.experts.8.down_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.8.gate_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.8.up_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.9.down_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.9.gate_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.experts.9.up_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.mlp.gate.e_score_correction_bias": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.gate.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.shared_experts.down_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.shared_experts.gate_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.mlp.shared_experts.up_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00012-of-00075.safetensors", + "model.layers.15.self_attn.k_norm.weight": "model-00011-of-00075.safetensors", + "model.layers.15.self_attn.k_proj.bias": "model-00011-of-00075.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.self_attn.q_norm.weight": "model-00011-of-00075.safetensors", + "model.layers.15.self_attn.q_proj.bias": "model-00011-of-00075.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.15.self_attn.v_proj.bias": "model-00011-of-00075.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00011-of-00075.safetensors", + "model.layers.16.input_layernorm.weight": "model-00013-of-00075.safetensors", + "model.layers.16.mlp.experts.0.down_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.0.gate_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.0.up_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.1.down_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.1.gate_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.1.up_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.10.down_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.10.gate_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.10.up_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.11.down_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.11.gate_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.11.up_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.12.down_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.12.gate_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.12.up_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.13.down_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.13.gate_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.13.up_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.14.down_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.14.gate_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.14.up_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.15.down_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.15.gate_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.15.up_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.16.down_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.16.gate_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.16.up_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.17.down_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.17.gate_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.17.up_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.18.down_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.18.gate_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.18.up_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.19.down_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.19.gate_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.19.up_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.2.down_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.2.gate_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.2.up_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.20.down_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.20.gate_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.20.up_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.21.down_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.21.gate_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.21.up_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.22.down_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.22.gate_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.22.up_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.23.down_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.23.gate_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.23.up_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.24.down_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.24.gate_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.24.up_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.25.down_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.25.gate_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.25.up_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.26.down_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.26.gate_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.26.up_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.27.down_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.27.gate_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.27.up_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.28.down_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.28.gate_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.28.up_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.29.down_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.29.gate_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.29.up_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.3.down_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.3.gate_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.3.up_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.30.down_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.30.gate_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.30.up_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.31.down_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.31.gate_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.31.up_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.32.down_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.32.gate_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.32.up_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.33.down_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.33.gate_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.33.up_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.34.down_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.34.gate_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.34.up_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.35.down_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.35.gate_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.35.up_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.36.down_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.36.gate_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.36.up_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.37.down_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.37.gate_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.37.up_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.38.down_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.38.gate_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.38.up_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.39.down_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.39.gate_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.39.up_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.4.down_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.4.gate_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.4.up_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.40.down_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.40.gate_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.40.up_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.41.down_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.41.gate_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.41.up_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.42.down_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.42.gate_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.42.up_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.43.down_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.43.gate_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.43.up_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.44.down_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.44.gate_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.44.up_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.45.down_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.45.gate_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.45.up_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.46.down_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.46.gate_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.46.up_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.47.down_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.47.gate_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.47.up_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.48.down_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.48.gate_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.48.up_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.49.down_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.49.gate_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.49.up_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.5.down_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.5.gate_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.5.up_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.50.down_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.50.gate_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.50.up_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.51.down_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.51.gate_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.51.up_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.52.down_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.52.gate_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.52.up_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.53.down_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.53.gate_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.53.up_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.54.down_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.54.gate_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.54.up_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.55.down_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.55.gate_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.55.up_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.56.down_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.56.gate_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.56.up_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.57.down_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.57.gate_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.57.up_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.58.down_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.58.gate_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.58.up_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.59.down_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.16.mlp.experts.59.gate_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.59.up_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.6.down_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.6.gate_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.6.up_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.60.down_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.16.mlp.experts.60.gate_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.16.mlp.experts.60.up_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.16.mlp.experts.61.down_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.16.mlp.experts.61.gate_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.16.mlp.experts.61.up_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.16.mlp.experts.62.down_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.16.mlp.experts.62.gate_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.16.mlp.experts.62.up_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.16.mlp.experts.63.down_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.16.mlp.experts.63.gate_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.16.mlp.experts.63.up_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.16.mlp.experts.64.down_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.16.mlp.experts.64.gate_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.16.mlp.experts.64.up_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.16.mlp.experts.65.down_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.16.mlp.experts.65.gate_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.16.mlp.experts.65.up_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.16.mlp.experts.66.down_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.16.mlp.experts.66.gate_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.16.mlp.experts.66.up_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.16.mlp.experts.67.down_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.16.mlp.experts.67.gate_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.16.mlp.experts.67.up_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.16.mlp.experts.68.down_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.16.mlp.experts.68.gate_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.16.mlp.experts.68.up_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.16.mlp.experts.69.down_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.16.mlp.experts.69.gate_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.16.mlp.experts.69.up_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.16.mlp.experts.7.down_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.7.gate_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.7.up_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.70.down_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.16.mlp.experts.70.gate_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.16.mlp.experts.70.up_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.16.mlp.experts.71.down_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.16.mlp.experts.71.gate_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.16.mlp.experts.71.up_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.16.mlp.experts.72.down_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.16.mlp.experts.72.gate_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.16.mlp.experts.72.up_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.16.mlp.experts.73.down_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.16.mlp.experts.73.gate_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.16.mlp.experts.73.up_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.16.mlp.experts.74.down_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.16.mlp.experts.74.gate_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.16.mlp.experts.74.up_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.16.mlp.experts.75.down_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.16.mlp.experts.75.gate_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.16.mlp.experts.75.up_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.16.mlp.experts.76.down_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.16.mlp.experts.76.gate_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.16.mlp.experts.76.up_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.16.mlp.experts.77.down_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.16.mlp.experts.77.gate_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.16.mlp.experts.77.up_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.16.mlp.experts.78.down_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.16.mlp.experts.78.gate_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.16.mlp.experts.78.up_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.16.mlp.experts.79.down_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.16.mlp.experts.79.gate_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.16.mlp.experts.79.up_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.16.mlp.experts.8.down_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.8.gate_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.8.up_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.9.down_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.9.gate_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.experts.9.up_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.mlp.gate.e_score_correction_bias": "model-00013-of-00075.safetensors", + "model.layers.16.mlp.gate.weight": "model-00013-of-00075.safetensors", + "model.layers.16.mlp.shared_experts.down_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.16.mlp.shared_experts.gate_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.16.mlp.shared_experts.up_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00013-of-00075.safetensors", + "model.layers.16.self_attn.k_norm.weight": "model-00012-of-00075.safetensors", + "model.layers.16.self_attn.k_proj.bias": "model-00012-of-00075.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.self_attn.q_norm.weight": "model-00012-of-00075.safetensors", + "model.layers.16.self_attn.q_proj.bias": "model-00012-of-00075.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.16.self_attn.v_proj.bias": "model-00012-of-00075.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00012-of-00075.safetensors", + "model.layers.17.input_layernorm.weight": "model-00014-of-00075.safetensors", + "model.layers.17.mlp.experts.0.down_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.0.gate_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.0.up_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.1.down_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.1.gate_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.1.up_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.10.down_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.10.gate_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.10.up_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.11.down_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.11.gate_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.11.up_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.12.down_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.12.gate_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.12.up_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.13.down_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.13.gate_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.13.up_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.14.down_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.14.gate_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.14.up_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.15.down_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.15.gate_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.15.up_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.16.down_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.16.gate_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.16.up_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.17.down_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.17.gate_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.17.up_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.18.down_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.18.gate_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.18.up_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.19.down_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.19.gate_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.19.up_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.2.down_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.2.gate_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.2.up_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.20.down_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.20.gate_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.20.up_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.21.down_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.21.gate_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.21.up_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.22.down_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.22.gate_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.22.up_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.23.down_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.23.gate_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.23.up_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.24.down_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.24.gate_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.24.up_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.25.down_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.25.gate_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.25.up_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.26.down_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.26.gate_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.26.up_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.27.down_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.27.gate_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.27.up_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.28.down_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.28.gate_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.28.up_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.29.down_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.29.gate_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.29.up_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.3.down_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.3.gate_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.3.up_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.30.down_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.30.gate_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.30.up_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.31.down_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.31.gate_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.31.up_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.32.down_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.32.gate_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.32.up_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.33.down_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.33.gate_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.33.up_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.34.down_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.34.gate_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.34.up_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.35.down_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.35.gate_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.35.up_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.36.down_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.36.gate_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.36.up_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.37.down_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.37.gate_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.37.up_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.38.down_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.38.gate_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.38.up_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.39.down_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.39.gate_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.39.up_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.4.down_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.4.gate_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.4.up_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.40.down_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.40.gate_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.40.up_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.41.down_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.41.gate_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.41.up_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.42.down_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.42.gate_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.42.up_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.43.down_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.43.gate_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.43.up_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.44.down_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.44.gate_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.44.up_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.45.down_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.45.gate_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.45.up_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.46.down_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.46.gate_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.46.up_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.47.down_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.47.gate_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.47.up_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.48.down_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.48.gate_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.48.up_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.49.down_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.49.gate_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.49.up_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.5.down_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.5.gate_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.5.up_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.50.down_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.50.gate_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.50.up_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.51.down_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.51.gate_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.51.up_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.52.down_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.52.gate_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.52.up_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.53.down_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.53.gate_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.53.up_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.54.down_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.54.gate_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.54.up_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.55.down_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.55.gate_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.55.up_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.56.down_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.56.gate_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.56.up_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.57.down_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.57.gate_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.57.up_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.58.down_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.58.gate_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.58.up_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.59.down_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.59.gate_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.59.up_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.6.down_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.6.gate_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.6.up_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.60.down_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.60.gate_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.60.up_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.61.down_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.61.gate_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.61.up_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.62.down_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.62.gate_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.62.up_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.63.down_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.63.gate_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.63.up_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.64.down_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.64.gate_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.64.up_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.65.down_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.65.gate_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.65.up_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.66.down_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.66.gate_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.66.up_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.67.down_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.67.gate_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.67.up_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.68.down_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.68.gate_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.68.up_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.69.down_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.69.gate_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.69.up_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.7.down_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.7.gate_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.7.up_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.70.down_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.70.gate_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.70.up_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.71.down_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.71.gate_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.71.up_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.72.down_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.72.gate_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.72.up_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.73.down_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.73.gate_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.73.up_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.74.down_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.74.gate_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.74.up_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.75.down_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.75.gate_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.75.up_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.76.down_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.76.gate_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.76.up_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.77.down_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.77.gate_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.77.up_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.78.down_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.17.mlp.experts.78.gate_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.78.up_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.79.down_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.17.mlp.experts.79.gate_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.17.mlp.experts.79.up_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.17.mlp.experts.8.down_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.8.gate_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.8.up_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.9.down_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.9.gate_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.experts.9.up_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.mlp.gate.e_score_correction_bias": "model-00014-of-00075.safetensors", + "model.layers.17.mlp.gate.weight": "model-00014-of-00075.safetensors", + "model.layers.17.mlp.shared_experts.down_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.17.mlp.shared_experts.gate_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.17.mlp.shared_experts.up_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00014-of-00075.safetensors", + "model.layers.17.self_attn.k_norm.weight": "model-00013-of-00075.safetensors", + "model.layers.17.self_attn.k_proj.bias": "model-00013-of-00075.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.self_attn.q_norm.weight": "model-00013-of-00075.safetensors", + "model.layers.17.self_attn.q_proj.bias": "model-00013-of-00075.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.17.self_attn.v_proj.bias": "model-00013-of-00075.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00013-of-00075.safetensors", + "model.layers.18.input_layernorm.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.0.down_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.0.gate_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.0.up_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.1.down_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.1.gate_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.1.up_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.10.down_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.10.gate_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.10.up_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.11.down_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.11.gate_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.11.up_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.12.down_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.12.gate_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.12.up_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.13.down_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.13.gate_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.13.up_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.14.down_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.14.gate_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.14.up_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.15.down_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.15.gate_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.15.up_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.16.down_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.16.gate_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.16.up_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.17.down_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.17.gate_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.17.up_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.18.down_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.18.gate_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.18.up_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.19.down_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.19.gate_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.19.up_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.2.down_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.2.gate_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.2.up_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.20.down_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.20.gate_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.20.up_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.21.down_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.21.gate_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.21.up_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.22.down_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.22.gate_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.22.up_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.23.down_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.23.gate_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.23.up_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.24.down_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.24.gate_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.24.up_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.25.down_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.25.gate_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.25.up_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.26.down_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.26.gate_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.26.up_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.27.down_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.27.gate_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.27.up_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.28.down_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.28.gate_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.28.up_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.29.down_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.29.gate_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.29.up_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.3.down_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.3.gate_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.3.up_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.30.down_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.30.gate_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.30.up_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.31.down_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.31.gate_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.31.up_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.32.down_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.32.gate_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.32.up_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.33.down_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.33.gate_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.33.up_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.34.down_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.34.gate_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.34.up_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.35.down_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.35.gate_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.35.up_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.36.down_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.36.gate_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.36.up_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.37.down_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.37.gate_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.37.up_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.38.down_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.38.gate_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.38.up_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.39.down_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.39.gate_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.39.up_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.4.down_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.4.gate_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.4.up_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.40.down_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.40.gate_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.40.up_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.41.down_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.41.gate_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.41.up_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.42.down_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.42.gate_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.42.up_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.43.down_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.43.gate_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.43.up_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.44.down_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.44.gate_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.44.up_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.45.down_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.45.gate_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.45.up_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.46.down_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.46.gate_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.46.up_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.47.down_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.47.gate_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.47.up_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.48.down_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.48.gate_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.48.up_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.49.down_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.49.gate_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.49.up_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.5.down_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.5.gate_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.5.up_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.50.down_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.50.gate_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.50.up_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.51.down_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.51.gate_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.51.up_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.52.down_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.52.gate_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.52.up_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.53.down_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.53.gate_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.53.up_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.54.down_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.54.gate_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.54.up_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.55.down_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.55.gate_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.55.up_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.56.down_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.56.gate_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.56.up_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.57.down_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.57.gate_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.57.up_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.58.down_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.58.gate_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.58.up_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.59.down_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.59.gate_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.59.up_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.6.down_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.6.gate_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.6.up_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.60.down_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.60.gate_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.60.up_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.61.down_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.61.gate_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.61.up_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.62.down_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.62.gate_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.62.up_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.63.down_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.63.gate_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.63.up_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.64.down_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.64.gate_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.64.up_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.65.down_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.65.gate_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.65.up_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.66.down_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.66.gate_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.66.up_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.67.down_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.67.gate_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.67.up_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.68.down_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.68.gate_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.68.up_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.69.down_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.69.gate_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.69.up_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.7.down_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.7.gate_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.7.up_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.70.down_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.70.gate_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.70.up_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.71.down_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.71.gate_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.71.up_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.72.down_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.72.gate_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.72.up_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.73.down_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.73.gate_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.73.up_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.74.down_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.74.gate_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.74.up_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.75.down_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.75.gate_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.75.up_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.76.down_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.76.gate_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.76.up_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.77.down_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.77.gate_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.77.up_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.78.down_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.78.gate_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.78.up_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.79.down_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.79.gate_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.79.up_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.8.down_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.8.gate_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.8.up_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.9.down_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.9.gate_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.experts.9.up_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.gate.e_score_correction_bias": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.gate.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.shared_experts.down_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.shared_experts.gate_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.mlp.shared_experts.up_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00014-of-00075.safetensors", + "model.layers.18.self_attn.k_norm.weight": "model-00014-of-00075.safetensors", + "model.layers.18.self_attn.k_proj.bias": "model-00014-of-00075.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.self_attn.q_norm.weight": "model-00014-of-00075.safetensors", + "model.layers.18.self_attn.q_proj.bias": "model-00014-of-00075.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.18.self_attn.v_proj.bias": "model-00014-of-00075.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.19.input_layernorm.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.0.down_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.19.mlp.experts.0.gate_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.19.mlp.experts.0.up_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.19.mlp.experts.1.down_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.19.mlp.experts.1.gate_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.19.mlp.experts.1.up_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.19.mlp.experts.10.down_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.19.mlp.experts.10.gate_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.19.mlp.experts.10.up_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.19.mlp.experts.11.down_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.11.gate_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.11.up_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.12.down_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.12.gate_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.12.up_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.13.down_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.13.gate_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.13.up_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.14.down_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.14.gate_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.14.up_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.15.down_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.15.gate_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.15.up_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.16.down_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.16.gate_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.16.up_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.17.down_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.17.gate_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.17.up_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.18.down_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.18.gate_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.18.up_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.19.down_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.19.gate_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.19.up_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.2.down_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.19.mlp.experts.2.gate_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.19.mlp.experts.2.up_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.19.mlp.experts.20.down_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.20.gate_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.20.up_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.21.down_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.21.gate_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.21.up_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.22.down_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.22.gate_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.22.up_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.23.down_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.23.gate_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.23.up_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.24.down_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.24.gate_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.24.up_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.25.down_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.25.gate_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.25.up_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.26.down_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.26.gate_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.26.up_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.27.down_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.27.gate_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.27.up_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.28.down_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.28.gate_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.28.up_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.29.down_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.29.gate_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.29.up_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.3.down_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.19.mlp.experts.3.gate_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.19.mlp.experts.3.up_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.19.mlp.experts.30.down_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.30.gate_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.30.up_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.31.down_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.31.gate_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.31.up_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.32.down_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.32.gate_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.32.up_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.33.down_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.33.gate_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.33.up_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.34.down_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.34.gate_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.34.up_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.35.down_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.35.gate_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.35.up_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.36.down_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.36.gate_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.36.up_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.37.down_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.37.gate_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.37.up_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.38.down_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.38.gate_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.38.up_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.39.down_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.39.gate_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.39.up_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.4.down_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.19.mlp.experts.4.gate_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.19.mlp.experts.4.up_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.19.mlp.experts.40.down_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.40.gate_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.40.up_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.41.down_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.41.gate_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.41.up_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.42.down_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.42.gate_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.42.up_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.43.down_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.43.gate_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.43.up_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.44.down_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.44.gate_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.44.up_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.45.down_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.45.gate_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.45.up_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.46.down_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.46.gate_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.46.up_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.47.down_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.47.gate_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.47.up_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.48.down_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.48.gate_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.48.up_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.49.down_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.49.gate_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.49.up_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.5.down_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.19.mlp.experts.5.gate_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.19.mlp.experts.5.up_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.19.mlp.experts.50.down_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.50.gate_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.50.up_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.51.down_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.51.gate_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.51.up_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.52.down_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.52.gate_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.52.up_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.53.down_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.53.gate_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.53.up_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.54.down_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.54.gate_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.54.up_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.55.down_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.55.gate_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.55.up_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.56.down_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.56.gate_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.56.up_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.57.down_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.57.gate_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.57.up_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.58.down_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.58.gate_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.58.up_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.59.down_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.59.gate_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.59.up_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.6.down_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.19.mlp.experts.6.gate_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.19.mlp.experts.6.up_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.19.mlp.experts.60.down_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.60.gate_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.60.up_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.61.down_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.61.gate_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.61.up_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.62.down_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.62.gate_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.62.up_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.63.down_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.63.gate_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.63.up_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.64.down_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.64.gate_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.64.up_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.65.down_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.65.gate_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.65.up_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.66.down_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.66.gate_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.66.up_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.67.down_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.67.gate_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.67.up_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.68.down_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.68.gate_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.68.up_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.69.down_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.69.gate_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.69.up_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.7.down_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.19.mlp.experts.7.gate_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.19.mlp.experts.7.up_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.19.mlp.experts.70.down_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.70.gate_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.70.up_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.71.down_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.71.gate_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.71.up_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.72.down_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.72.gate_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.72.up_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.73.down_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.73.gate_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.73.up_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.74.down_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.74.gate_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.74.up_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.75.down_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.75.gate_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.75.up_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.76.down_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.76.gate_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.76.up_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.77.down_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.77.gate_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.77.up_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.78.down_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.78.gate_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.78.up_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.79.down_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.79.gate_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.79.up_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.experts.8.down_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.19.mlp.experts.8.gate_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.19.mlp.experts.8.up_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.19.mlp.experts.9.down_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.19.mlp.experts.9.gate_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.19.mlp.experts.9.up_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.19.mlp.gate.e_score_correction_bias": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.gate.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.shared_experts.down_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.shared_experts.gate_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.mlp.shared_experts.up_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00015-of-00075.safetensors", + "model.layers.19.self_attn.k_norm.weight": "model-00014-of-00075.safetensors", + "model.layers.19.self_attn.k_proj.bias": "model-00014-of-00075.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.19.self_attn.q_norm.weight": "model-00014-of-00075.safetensors", + "model.layers.19.self_attn.q_proj.bias": "model-00014-of-00075.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.19.self_attn.v_proj.bias": "model-00014-of-00075.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00014-of-00075.safetensors", + "model.layers.2.input_layernorm.weight": "model-00001-of-00075.safetensors", + "model.layers.2.mlp.down_proj.weight": "model-00001-of-00075.safetensors", + "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00075.safetensors", + "model.layers.2.mlp.up_proj.weight": "model-00001-of-00075.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00075.safetensors", + "model.layers.2.self_attn.k_norm.weight": "model-00001-of-00075.safetensors", + "model.layers.2.self_attn.k_proj.bias": "model-00001-of-00075.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00075.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00075.safetensors", + "model.layers.2.self_attn.q_norm.weight": "model-00001-of-00075.safetensors", + "model.layers.2.self_attn.q_proj.bias": "model-00001-of-00075.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00075.safetensors", + "model.layers.2.self_attn.v_proj.bias": "model-00001-of-00075.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00075.safetensors", + "model.layers.20.input_layernorm.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.0.down_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.20.mlp.experts.0.gate_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.20.mlp.experts.0.up_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.20.mlp.experts.1.down_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.20.mlp.experts.1.gate_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.20.mlp.experts.1.up_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.20.mlp.experts.10.down_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.20.mlp.experts.10.gate_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.20.mlp.experts.10.up_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.20.mlp.experts.11.down_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.20.mlp.experts.11.gate_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.20.mlp.experts.11.up_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.20.mlp.experts.12.down_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.20.mlp.experts.12.gate_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.20.mlp.experts.12.up_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.20.mlp.experts.13.down_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.20.mlp.experts.13.gate_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.20.mlp.experts.13.up_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.20.mlp.experts.14.down_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.20.mlp.experts.14.gate_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.20.mlp.experts.14.up_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.20.mlp.experts.15.down_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.20.mlp.experts.15.gate_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.20.mlp.experts.15.up_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.20.mlp.experts.16.down_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.20.mlp.experts.16.gate_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.20.mlp.experts.16.up_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.20.mlp.experts.17.down_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.20.mlp.experts.17.gate_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.20.mlp.experts.17.up_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.20.mlp.experts.18.down_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.20.mlp.experts.18.gate_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.20.mlp.experts.18.up_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.20.mlp.experts.19.down_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.20.mlp.experts.19.gate_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.20.mlp.experts.19.up_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.20.mlp.experts.2.down_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.20.mlp.experts.2.gate_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.20.mlp.experts.2.up_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.20.mlp.experts.20.down_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.20.mlp.experts.20.gate_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.20.mlp.experts.20.up_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.20.mlp.experts.21.down_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.20.mlp.experts.21.gate_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.20.mlp.experts.21.up_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.20.mlp.experts.22.down_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.20.mlp.experts.22.gate_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.20.mlp.experts.22.up_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.20.mlp.experts.23.down_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.20.mlp.experts.23.gate_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.20.mlp.experts.23.up_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.20.mlp.experts.24.down_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.20.mlp.experts.24.gate_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.20.mlp.experts.24.up_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.20.mlp.experts.25.down_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.20.mlp.experts.25.gate_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.20.mlp.experts.25.up_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.20.mlp.experts.26.down_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.20.mlp.experts.26.gate_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.20.mlp.experts.26.up_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.20.mlp.experts.27.down_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.20.mlp.experts.27.gate_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.20.mlp.experts.27.up_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.20.mlp.experts.28.down_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.20.mlp.experts.28.gate_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.20.mlp.experts.28.up_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.20.mlp.experts.29.down_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.20.mlp.experts.29.gate_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.20.mlp.experts.29.up_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.20.mlp.experts.3.down_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.20.mlp.experts.3.gate_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.20.mlp.experts.3.up_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.20.mlp.experts.30.down_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.30.gate_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.30.up_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.31.down_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.31.gate_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.31.up_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.32.down_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.32.gate_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.32.up_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.33.down_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.33.gate_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.33.up_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.34.down_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.34.gate_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.34.up_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.35.down_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.35.gate_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.35.up_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.36.down_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.36.gate_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.36.up_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.37.down_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.37.gate_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.37.up_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.38.down_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.38.gate_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.38.up_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.39.down_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.39.gate_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.39.up_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.4.down_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.20.mlp.experts.4.gate_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.20.mlp.experts.4.up_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.20.mlp.experts.40.down_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.40.gate_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.40.up_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.41.down_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.41.gate_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.41.up_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.42.down_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.42.gate_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.42.up_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.43.down_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.43.gate_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.43.up_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.44.down_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.44.gate_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.44.up_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.45.down_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.45.gate_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.45.up_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.46.down_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.46.gate_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.46.up_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.47.down_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.47.gate_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.47.up_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.48.down_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.48.gate_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.48.up_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.49.down_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.49.gate_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.49.up_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.5.down_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.20.mlp.experts.5.gate_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.20.mlp.experts.5.up_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.20.mlp.experts.50.down_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.50.gate_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.50.up_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.51.down_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.51.gate_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.51.up_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.52.down_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.52.gate_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.52.up_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.53.down_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.53.gate_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.53.up_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.54.down_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.54.gate_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.54.up_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.55.down_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.55.gate_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.55.up_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.56.down_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.56.gate_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.56.up_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.57.down_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.57.gate_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.57.up_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.58.down_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.58.gate_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.58.up_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.59.down_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.59.gate_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.59.up_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.6.down_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.20.mlp.experts.6.gate_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.20.mlp.experts.6.up_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.20.mlp.experts.60.down_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.60.gate_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.60.up_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.61.down_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.61.gate_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.61.up_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.62.down_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.62.gate_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.62.up_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.63.down_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.63.gate_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.63.up_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.64.down_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.64.gate_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.64.up_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.65.down_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.65.gate_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.65.up_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.66.down_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.66.gate_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.66.up_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.67.down_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.67.gate_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.67.up_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.68.down_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.68.gate_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.68.up_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.69.down_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.69.gate_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.69.up_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.7.down_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.20.mlp.experts.7.gate_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.20.mlp.experts.7.up_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.20.mlp.experts.70.down_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.70.gate_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.70.up_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.71.down_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.71.gate_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.71.up_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.72.down_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.72.gate_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.72.up_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.73.down_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.73.gate_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.73.up_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.74.down_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.74.gate_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.74.up_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.75.down_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.75.gate_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.75.up_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.76.down_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.76.gate_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.76.up_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.77.down_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.77.gate_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.77.up_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.78.down_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.78.gate_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.78.up_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.79.down_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.79.gate_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.79.up_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.experts.8.down_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.20.mlp.experts.8.gate_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.20.mlp.experts.8.up_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.20.mlp.experts.9.down_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.20.mlp.experts.9.gate_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.20.mlp.experts.9.up_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.20.mlp.gate.e_score_correction_bias": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.gate.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.shared_experts.down_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.shared_experts.gate_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.mlp.shared_experts.up_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00016-of-00075.safetensors", + "model.layers.20.self_attn.k_norm.weight": "model-00015-of-00075.safetensors", + "model.layers.20.self_attn.k_proj.bias": "model-00015-of-00075.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.20.self_attn.q_norm.weight": "model-00015-of-00075.safetensors", + "model.layers.20.self_attn.q_proj.bias": "model-00015-of-00075.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.20.self_attn.v_proj.bias": "model-00015-of-00075.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00015-of-00075.safetensors", + "model.layers.21.input_layernorm.weight": "model-00017-of-00075.safetensors", + "model.layers.21.mlp.experts.0.down_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.0.gate_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.0.up_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.1.down_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.1.gate_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.1.up_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.10.down_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.10.gate_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.10.up_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.11.down_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.11.gate_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.11.up_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.12.down_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.12.gate_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.12.up_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.13.down_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.13.gate_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.13.up_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.14.down_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.14.gate_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.14.up_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.15.down_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.15.gate_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.15.up_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.16.down_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.16.gate_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.16.up_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.17.down_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.17.gate_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.17.up_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.18.down_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.18.gate_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.18.up_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.19.down_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.19.gate_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.19.up_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.2.down_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.2.gate_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.2.up_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.20.down_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.20.gate_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.20.up_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.21.down_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.21.gate_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.21.up_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.22.down_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.22.gate_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.22.up_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.23.down_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.23.gate_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.23.up_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.24.down_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.24.gate_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.24.up_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.25.down_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.25.gate_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.25.up_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.26.down_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.26.gate_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.26.up_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.27.down_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.27.gate_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.27.up_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.28.down_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.28.gate_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.28.up_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.29.down_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.29.gate_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.29.up_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.3.down_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.3.gate_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.3.up_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.30.down_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.30.gate_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.30.up_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.31.down_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.31.gate_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.31.up_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.32.down_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.32.gate_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.32.up_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.33.down_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.33.gate_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.33.up_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.34.down_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.34.gate_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.34.up_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.35.down_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.35.gate_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.35.up_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.36.down_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.36.gate_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.36.up_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.37.down_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.37.gate_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.37.up_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.38.down_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.38.gate_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.38.up_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.39.down_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.39.gate_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.39.up_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.4.down_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.4.gate_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.4.up_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.40.down_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.40.gate_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.40.up_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.41.down_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.41.gate_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.41.up_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.42.down_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.42.gate_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.42.up_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.43.down_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.43.gate_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.43.up_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.44.down_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.44.gate_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.44.up_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.45.down_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.45.gate_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.45.up_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.46.down_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.46.gate_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.46.up_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.47.down_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.47.gate_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.47.up_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.48.down_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.48.gate_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.48.up_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.49.down_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.21.mlp.experts.49.gate_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.21.mlp.experts.49.up_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.21.mlp.experts.5.down_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.5.gate_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.5.up_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.50.down_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.21.mlp.experts.50.gate_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.21.mlp.experts.50.up_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.21.mlp.experts.51.down_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.21.mlp.experts.51.gate_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.21.mlp.experts.51.up_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.21.mlp.experts.52.down_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.21.mlp.experts.52.gate_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.21.mlp.experts.52.up_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.21.mlp.experts.53.down_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.21.mlp.experts.53.gate_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.21.mlp.experts.53.up_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.21.mlp.experts.54.down_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.21.mlp.experts.54.gate_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.21.mlp.experts.54.up_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.21.mlp.experts.55.down_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.21.mlp.experts.55.gate_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.21.mlp.experts.55.up_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.21.mlp.experts.56.down_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.21.mlp.experts.56.gate_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.21.mlp.experts.56.up_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.21.mlp.experts.57.down_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.21.mlp.experts.57.gate_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.21.mlp.experts.57.up_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.21.mlp.experts.58.down_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.21.mlp.experts.58.gate_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.21.mlp.experts.58.up_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.21.mlp.experts.59.down_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.21.mlp.experts.59.gate_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.21.mlp.experts.59.up_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.21.mlp.experts.6.down_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.6.gate_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.6.up_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.60.down_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.21.mlp.experts.60.gate_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.21.mlp.experts.60.up_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.21.mlp.experts.61.down_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.21.mlp.experts.61.gate_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.21.mlp.experts.61.up_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.21.mlp.experts.62.down_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.21.mlp.experts.62.gate_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.21.mlp.experts.62.up_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.21.mlp.experts.63.down_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.21.mlp.experts.63.gate_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.21.mlp.experts.63.up_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.21.mlp.experts.64.down_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.21.mlp.experts.64.gate_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.21.mlp.experts.64.up_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.21.mlp.experts.65.down_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.21.mlp.experts.65.gate_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.21.mlp.experts.65.up_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.21.mlp.experts.66.down_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.21.mlp.experts.66.gate_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.21.mlp.experts.66.up_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.21.mlp.experts.67.down_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.21.mlp.experts.67.gate_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.21.mlp.experts.67.up_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.21.mlp.experts.68.down_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.21.mlp.experts.68.gate_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.21.mlp.experts.68.up_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.21.mlp.experts.69.down_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.21.mlp.experts.69.gate_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.21.mlp.experts.69.up_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.21.mlp.experts.7.down_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.7.gate_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.7.up_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.70.down_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.21.mlp.experts.70.gate_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.21.mlp.experts.70.up_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.21.mlp.experts.71.down_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.21.mlp.experts.71.gate_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.21.mlp.experts.71.up_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.21.mlp.experts.72.down_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.21.mlp.experts.72.gate_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.21.mlp.experts.72.up_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.21.mlp.experts.73.down_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.21.mlp.experts.73.gate_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.21.mlp.experts.73.up_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.21.mlp.experts.74.down_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.21.mlp.experts.74.gate_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.21.mlp.experts.74.up_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.21.mlp.experts.75.down_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.21.mlp.experts.75.gate_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.21.mlp.experts.75.up_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.21.mlp.experts.76.down_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.21.mlp.experts.76.gate_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.21.mlp.experts.76.up_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.21.mlp.experts.77.down_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.21.mlp.experts.77.gate_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.21.mlp.experts.77.up_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.21.mlp.experts.78.down_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.21.mlp.experts.78.gate_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.21.mlp.experts.78.up_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.21.mlp.experts.79.down_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.21.mlp.experts.79.gate_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.21.mlp.experts.79.up_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.21.mlp.experts.8.down_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.8.gate_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.8.up_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.9.down_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.9.gate_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.experts.9.up_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.mlp.gate.e_score_correction_bias": "model-00017-of-00075.safetensors", + "model.layers.21.mlp.gate.weight": "model-00017-of-00075.safetensors", + "model.layers.21.mlp.shared_experts.down_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.21.mlp.shared_experts.gate_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.21.mlp.shared_experts.up_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00017-of-00075.safetensors", + "model.layers.21.self_attn.k_norm.weight": "model-00016-of-00075.safetensors", + "model.layers.21.self_attn.k_proj.bias": "model-00016-of-00075.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.self_attn.q_norm.weight": "model-00016-of-00075.safetensors", + "model.layers.21.self_attn.q_proj.bias": "model-00016-of-00075.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.21.self_attn.v_proj.bias": "model-00016-of-00075.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00016-of-00075.safetensors", + "model.layers.22.input_layernorm.weight": "model-00018-of-00075.safetensors", + "model.layers.22.mlp.experts.0.down_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.0.gate_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.0.up_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.1.down_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.1.gate_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.1.up_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.10.down_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.10.gate_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.10.up_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.11.down_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.11.gate_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.11.up_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.12.down_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.12.gate_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.12.up_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.13.down_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.13.gate_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.13.up_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.14.down_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.14.gate_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.14.up_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.15.down_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.15.gate_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.15.up_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.16.down_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.16.gate_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.16.up_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.17.down_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.17.gate_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.17.up_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.18.down_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.18.gate_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.18.up_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.19.down_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.19.gate_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.19.up_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.2.down_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.2.gate_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.2.up_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.20.down_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.20.gate_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.20.up_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.21.down_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.21.gate_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.21.up_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.22.down_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.22.gate_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.22.up_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.23.down_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.23.gate_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.23.up_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.24.down_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.24.gate_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.24.up_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.25.down_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.25.gate_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.25.up_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.26.down_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.26.gate_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.26.up_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.27.down_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.27.gate_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.27.up_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.28.down_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.28.gate_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.28.up_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.29.down_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.29.gate_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.29.up_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.3.down_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.3.gate_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.3.up_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.30.down_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.30.gate_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.30.up_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.31.down_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.31.gate_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.31.up_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.32.down_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.32.gate_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.32.up_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.33.down_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.33.gate_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.33.up_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.34.down_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.34.gate_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.34.up_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.35.down_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.35.gate_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.35.up_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.36.down_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.36.gate_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.36.up_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.37.down_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.37.gate_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.37.up_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.38.down_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.38.gate_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.38.up_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.39.down_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.39.gate_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.39.up_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.4.down_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.4.gate_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.4.up_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.40.down_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.40.gate_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.40.up_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.41.down_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.41.gate_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.41.up_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.42.down_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.42.gate_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.42.up_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.43.down_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.43.gate_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.43.up_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.44.down_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.44.gate_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.44.up_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.45.down_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.45.gate_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.45.up_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.46.down_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.46.gate_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.46.up_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.47.down_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.47.gate_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.47.up_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.48.down_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.48.gate_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.48.up_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.49.down_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.49.gate_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.49.up_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.5.down_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.5.gate_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.5.up_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.50.down_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.50.gate_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.50.up_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.51.down_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.51.gate_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.51.up_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.52.down_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.52.gate_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.52.up_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.53.down_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.53.gate_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.53.up_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.54.down_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.54.gate_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.54.up_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.55.down_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.55.gate_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.55.up_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.56.down_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.56.gate_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.56.up_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.57.down_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.57.gate_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.57.up_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.58.down_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.58.gate_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.58.up_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.59.down_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.59.gate_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.59.up_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.6.down_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.6.gate_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.6.up_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.60.down_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.60.gate_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.60.up_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.61.down_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.61.gate_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.61.up_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.62.down_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.62.gate_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.62.up_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.63.down_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.63.gate_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.63.up_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.64.down_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.64.gate_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.64.up_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.65.down_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.65.gate_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.65.up_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.66.down_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.66.gate_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.66.up_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.67.down_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.67.gate_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.67.up_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.68.down_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.22.mlp.experts.68.gate_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.22.mlp.experts.68.up_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.22.mlp.experts.69.down_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.22.mlp.experts.69.gate_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.22.mlp.experts.69.up_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.22.mlp.experts.7.down_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.7.gate_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.7.up_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.70.down_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.22.mlp.experts.70.gate_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.22.mlp.experts.70.up_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.22.mlp.experts.71.down_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.22.mlp.experts.71.gate_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.22.mlp.experts.71.up_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.22.mlp.experts.72.down_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.22.mlp.experts.72.gate_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.22.mlp.experts.72.up_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.22.mlp.experts.73.down_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.22.mlp.experts.73.gate_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.22.mlp.experts.73.up_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.22.mlp.experts.74.down_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.22.mlp.experts.74.gate_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.22.mlp.experts.74.up_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.22.mlp.experts.75.down_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.22.mlp.experts.75.gate_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.22.mlp.experts.75.up_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.22.mlp.experts.76.down_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.22.mlp.experts.76.gate_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.22.mlp.experts.76.up_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.22.mlp.experts.77.down_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.22.mlp.experts.77.gate_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.22.mlp.experts.77.up_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.22.mlp.experts.78.down_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.22.mlp.experts.78.gate_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.22.mlp.experts.78.up_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.22.mlp.experts.79.down_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.22.mlp.experts.79.gate_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.22.mlp.experts.79.up_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.22.mlp.experts.8.down_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.8.gate_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.8.up_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.9.down_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.9.gate_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.experts.9.up_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.mlp.gate.e_score_correction_bias": "model-00018-of-00075.safetensors", + "model.layers.22.mlp.gate.weight": "model-00018-of-00075.safetensors", + "model.layers.22.mlp.shared_experts.down_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.22.mlp.shared_experts.gate_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.22.mlp.shared_experts.up_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00018-of-00075.safetensors", + "model.layers.22.self_attn.k_norm.weight": "model-00017-of-00075.safetensors", + "model.layers.22.self_attn.k_proj.bias": "model-00017-of-00075.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.self_attn.q_norm.weight": "model-00017-of-00075.safetensors", + "model.layers.22.self_attn.q_proj.bias": "model-00017-of-00075.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.22.self_attn.v_proj.bias": "model-00017-of-00075.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00017-of-00075.safetensors", + "model.layers.23.input_layernorm.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.0.down_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.0.gate_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.0.up_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.1.down_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.1.gate_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.1.up_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.10.down_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.10.gate_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.10.up_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.11.down_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.11.gate_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.11.up_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.12.down_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.12.gate_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.12.up_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.13.down_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.13.gate_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.13.up_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.14.down_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.14.gate_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.14.up_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.15.down_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.15.gate_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.15.up_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.16.down_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.16.gate_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.16.up_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.17.down_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.17.gate_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.17.up_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.18.down_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.18.gate_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.18.up_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.19.down_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.19.gate_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.19.up_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.2.down_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.2.gate_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.2.up_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.20.down_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.20.gate_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.20.up_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.21.down_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.21.gate_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.21.up_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.22.down_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.22.gate_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.22.up_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.23.down_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.23.gate_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.23.up_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.24.down_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.24.gate_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.24.up_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.25.down_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.25.gate_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.25.up_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.26.down_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.26.gate_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.26.up_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.27.down_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.27.gate_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.27.up_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.28.down_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.28.gate_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.28.up_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.29.down_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.29.gate_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.29.up_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.3.down_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.3.gate_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.3.up_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.30.down_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.30.gate_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.30.up_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.31.down_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.31.gate_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.31.up_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.32.down_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.32.gate_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.32.up_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.33.down_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.33.gate_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.33.up_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.34.down_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.34.gate_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.34.up_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.35.down_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.35.gate_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.35.up_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.36.down_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.36.gate_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.36.up_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.37.down_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.37.gate_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.37.up_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.38.down_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.38.gate_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.38.up_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.39.down_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.39.gate_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.39.up_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.4.down_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.4.gate_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.4.up_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.40.down_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.40.gate_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.40.up_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.41.down_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.41.gate_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.41.up_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.42.down_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.42.gate_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.42.up_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.43.down_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.43.gate_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.43.up_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.44.down_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.44.gate_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.44.up_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.45.down_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.45.gate_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.45.up_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.46.down_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.46.gate_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.46.up_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.47.down_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.47.gate_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.47.up_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.48.down_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.48.gate_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.48.up_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.49.down_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.49.gate_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.49.up_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.5.down_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.5.gate_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.5.up_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.50.down_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.50.gate_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.50.up_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.51.down_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.51.gate_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.51.up_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.52.down_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.52.gate_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.52.up_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.53.down_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.53.gate_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.53.up_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.54.down_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.54.gate_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.54.up_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.55.down_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.55.gate_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.55.up_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.56.down_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.56.gate_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.56.up_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.57.down_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.57.gate_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.57.up_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.58.down_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.58.gate_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.58.up_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.59.down_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.59.gate_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.59.up_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.6.down_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.6.gate_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.6.up_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.60.down_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.60.gate_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.60.up_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.61.down_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.61.gate_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.61.up_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.62.down_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.62.gate_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.62.up_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.63.down_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.63.gate_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.63.up_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.64.down_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.64.gate_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.64.up_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.65.down_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.65.gate_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.65.up_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.66.down_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.66.gate_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.66.up_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.67.down_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.67.gate_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.67.up_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.68.down_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.68.gate_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.68.up_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.69.down_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.69.gate_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.69.up_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.7.down_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.7.gate_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.7.up_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.70.down_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.70.gate_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.70.up_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.71.down_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.71.gate_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.71.up_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.72.down_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.72.gate_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.72.up_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.73.down_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.73.gate_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.73.up_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.74.down_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.74.gate_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.74.up_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.75.down_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.75.gate_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.75.up_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.76.down_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.76.gate_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.76.up_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.77.down_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.77.gate_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.77.up_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.78.down_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.78.gate_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.78.up_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.79.down_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.79.gate_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.79.up_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.8.down_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.8.gate_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.8.up_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.9.down_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.9.gate_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.experts.9.up_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.gate.e_score_correction_bias": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.gate.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.shared_experts.down_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.shared_experts.gate_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.mlp.shared_experts.up_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00018-of-00075.safetensors", + "model.layers.23.self_attn.k_norm.weight": "model-00018-of-00075.safetensors", + "model.layers.23.self_attn.k_proj.bias": "model-00018-of-00075.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.self_attn.q_norm.weight": "model-00018-of-00075.safetensors", + "model.layers.23.self_attn.q_proj.bias": "model-00018-of-00075.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.23.self_attn.v_proj.bias": "model-00018-of-00075.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.24.input_layernorm.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.0.down_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.0.gate_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.24.mlp.experts.0.up_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.1.down_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.1.gate_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.1.up_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.10.down_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.10.gate_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.10.up_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.11.down_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.11.gate_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.11.up_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.12.down_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.12.gate_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.12.up_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.13.down_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.13.gate_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.13.up_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.14.down_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.14.gate_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.14.up_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.15.down_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.15.gate_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.15.up_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.16.down_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.16.gate_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.16.up_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.17.down_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.17.gate_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.17.up_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.18.down_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.18.gate_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.18.up_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.19.down_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.19.gate_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.19.up_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.2.down_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.2.gate_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.2.up_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.20.down_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.20.gate_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.20.up_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.21.down_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.21.gate_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.21.up_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.22.down_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.22.gate_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.22.up_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.23.down_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.23.gate_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.23.up_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.24.down_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.24.gate_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.24.up_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.25.down_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.25.gate_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.25.up_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.26.down_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.26.gate_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.26.up_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.27.down_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.27.gate_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.27.up_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.28.down_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.28.gate_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.28.up_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.29.down_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.29.gate_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.29.up_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.3.down_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.3.gate_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.3.up_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.30.down_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.30.gate_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.30.up_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.31.down_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.31.gate_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.31.up_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.32.down_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.32.gate_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.32.up_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.33.down_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.33.gate_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.33.up_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.34.down_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.34.gate_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.34.up_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.35.down_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.35.gate_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.35.up_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.36.down_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.36.gate_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.36.up_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.37.down_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.37.gate_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.37.up_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.38.down_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.38.gate_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.38.up_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.39.down_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.39.gate_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.39.up_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.4.down_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.4.gate_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.4.up_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.40.down_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.40.gate_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.40.up_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.41.down_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.41.gate_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.41.up_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.42.down_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.42.gate_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.42.up_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.43.down_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.43.gate_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.43.up_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.44.down_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.44.gate_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.44.up_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.45.down_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.45.gate_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.45.up_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.46.down_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.46.gate_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.46.up_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.47.down_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.47.gate_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.47.up_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.48.down_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.48.gate_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.48.up_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.49.down_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.49.gate_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.49.up_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.5.down_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.5.gate_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.5.up_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.50.down_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.50.gate_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.50.up_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.51.down_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.51.gate_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.51.up_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.52.down_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.52.gate_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.52.up_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.53.down_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.53.gate_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.53.up_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.54.down_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.54.gate_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.54.up_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.55.down_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.55.gate_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.55.up_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.56.down_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.56.gate_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.56.up_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.57.down_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.57.gate_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.57.up_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.58.down_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.58.gate_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.58.up_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.59.down_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.59.gate_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.59.up_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.6.down_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.6.gate_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.6.up_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.60.down_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.60.gate_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.60.up_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.61.down_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.61.gate_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.61.up_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.62.down_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.62.gate_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.62.up_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.63.down_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.63.gate_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.63.up_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.64.down_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.64.gate_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.64.up_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.65.down_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.65.gate_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.65.up_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.66.down_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.66.gate_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.66.up_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.67.down_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.67.gate_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.67.up_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.68.down_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.68.gate_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.68.up_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.69.down_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.69.gate_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.69.up_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.7.down_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.7.gate_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.7.up_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.70.down_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.70.gate_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.70.up_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.71.down_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.71.gate_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.71.up_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.72.down_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.72.gate_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.72.up_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.73.down_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.73.gate_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.73.up_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.74.down_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.74.gate_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.74.up_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.75.down_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.75.gate_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.75.up_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.76.down_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.76.gate_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.76.up_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.77.down_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.77.gate_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.77.up_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.78.down_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.78.gate_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.78.up_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.79.down_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.79.gate_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.79.up_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.8.down_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.8.gate_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.8.up_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.9.down_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.9.gate_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.experts.9.up_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.gate.e_score_correction_bias": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.gate.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.shared_experts.down_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.shared_experts.gate_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.mlp.shared_experts.up_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00019-of-00075.safetensors", + "model.layers.24.self_attn.k_norm.weight": "model-00018-of-00075.safetensors", + "model.layers.24.self_attn.k_proj.bias": "model-00018-of-00075.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.24.self_attn.q_norm.weight": "model-00018-of-00075.safetensors", + "model.layers.24.self_attn.q_proj.bias": "model-00018-of-00075.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.24.self_attn.v_proj.bias": "model-00018-of-00075.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00018-of-00075.safetensors", + "model.layers.25.input_layernorm.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.0.down_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.25.mlp.experts.0.gate_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.25.mlp.experts.0.up_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.25.mlp.experts.1.down_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.25.mlp.experts.1.gate_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.25.mlp.experts.1.up_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.25.mlp.experts.10.down_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.25.mlp.experts.10.gate_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.25.mlp.experts.10.up_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.25.mlp.experts.11.down_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.25.mlp.experts.11.gate_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.25.mlp.experts.11.up_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.25.mlp.experts.12.down_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.25.mlp.experts.12.gate_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.25.mlp.experts.12.up_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.25.mlp.experts.13.down_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.25.mlp.experts.13.gate_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.25.mlp.experts.13.up_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.25.mlp.experts.14.down_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.25.mlp.experts.14.gate_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.25.mlp.experts.14.up_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.25.mlp.experts.15.down_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.25.mlp.experts.15.gate_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.25.mlp.experts.15.up_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.25.mlp.experts.16.down_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.25.mlp.experts.16.gate_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.25.mlp.experts.16.up_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.25.mlp.experts.17.down_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.25.mlp.experts.17.gate_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.25.mlp.experts.17.up_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.25.mlp.experts.18.down_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.25.mlp.experts.18.gate_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.25.mlp.experts.18.up_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.25.mlp.experts.19.down_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.19.gate_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.25.mlp.experts.19.up_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.2.down_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.25.mlp.experts.2.gate_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.25.mlp.experts.2.up_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.25.mlp.experts.20.down_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.20.gate_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.20.up_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.21.down_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.21.gate_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.21.up_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.22.down_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.22.gate_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.22.up_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.23.down_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.23.gate_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.23.up_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.24.down_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.24.gate_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.24.up_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.25.down_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.25.gate_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.25.up_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.26.down_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.26.gate_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.26.up_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.27.down_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.27.gate_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.27.up_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.28.down_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.28.gate_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.28.up_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.29.down_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.29.gate_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.29.up_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.3.down_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.25.mlp.experts.3.gate_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.25.mlp.experts.3.up_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.25.mlp.experts.30.down_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.30.gate_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.30.up_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.31.down_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.31.gate_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.31.up_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.32.down_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.32.gate_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.32.up_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.33.down_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.33.gate_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.33.up_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.34.down_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.34.gate_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.34.up_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.35.down_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.35.gate_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.35.up_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.36.down_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.36.gate_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.36.up_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.37.down_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.37.gate_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.37.up_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.38.down_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.38.gate_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.38.up_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.39.down_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.39.gate_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.39.up_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.4.down_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.25.mlp.experts.4.gate_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.25.mlp.experts.4.up_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.25.mlp.experts.40.down_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.40.gate_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.40.up_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.41.down_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.41.gate_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.41.up_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.42.down_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.42.gate_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.42.up_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.43.down_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.43.gate_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.43.up_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.44.down_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.44.gate_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.44.up_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.45.down_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.45.gate_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.45.up_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.46.down_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.46.gate_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.46.up_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.47.down_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.47.gate_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.47.up_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.48.down_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.48.gate_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.48.up_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.49.down_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.49.gate_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.49.up_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.5.down_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.25.mlp.experts.5.gate_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.25.mlp.experts.5.up_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.25.mlp.experts.50.down_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.50.gate_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.50.up_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.51.down_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.51.gate_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.51.up_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.52.down_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.52.gate_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.52.up_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.53.down_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.53.gate_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.53.up_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.54.down_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.54.gate_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.54.up_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.55.down_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.55.gate_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.55.up_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.56.down_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.56.gate_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.56.up_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.57.down_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.57.gate_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.57.up_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.58.down_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.58.gate_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.58.up_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.59.down_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.59.gate_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.59.up_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.6.down_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.25.mlp.experts.6.gate_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.25.mlp.experts.6.up_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.25.mlp.experts.60.down_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.60.gate_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.60.up_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.61.down_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.61.gate_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.61.up_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.62.down_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.62.gate_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.62.up_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.63.down_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.63.gate_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.63.up_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.64.down_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.64.gate_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.64.up_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.65.down_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.65.gate_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.65.up_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.66.down_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.66.gate_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.66.up_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.67.down_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.67.gate_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.67.up_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.68.down_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.68.gate_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.68.up_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.69.down_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.69.gate_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.69.up_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.7.down_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.25.mlp.experts.7.gate_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.25.mlp.experts.7.up_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.25.mlp.experts.70.down_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.70.gate_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.70.up_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.71.down_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.71.gate_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.71.up_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.72.down_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.72.gate_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.72.up_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.73.down_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.73.gate_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.73.up_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.74.down_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.74.gate_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.74.up_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.75.down_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.75.gate_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.75.up_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.76.down_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.76.gate_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.76.up_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.77.down_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.77.gate_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.77.up_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.78.down_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.78.gate_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.78.up_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.79.down_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.79.gate_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.79.up_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.experts.8.down_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.25.mlp.experts.8.gate_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.25.mlp.experts.8.up_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.25.mlp.experts.9.down_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.25.mlp.experts.9.gate_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.25.mlp.experts.9.up_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.25.mlp.gate.e_score_correction_bias": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.gate.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.shared_experts.down_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.shared_experts.gate_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.mlp.shared_experts.up_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00020-of-00075.safetensors", + "model.layers.25.self_attn.k_norm.weight": "model-00019-of-00075.safetensors", + "model.layers.25.self_attn.k_proj.bias": "model-00019-of-00075.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.25.self_attn.q_norm.weight": "model-00019-of-00075.safetensors", + "model.layers.25.self_attn.q_proj.bias": "model-00019-of-00075.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.25.self_attn.v_proj.bias": "model-00019-of-00075.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00019-of-00075.safetensors", + "model.layers.26.input_layernorm.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.0.down_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.mlp.experts.0.gate_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.mlp.experts.0.up_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.mlp.experts.1.down_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.mlp.experts.1.gate_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.mlp.experts.1.up_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.mlp.experts.10.down_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.mlp.experts.10.gate_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.mlp.experts.10.up_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.mlp.experts.11.down_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.mlp.experts.11.gate_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.mlp.experts.11.up_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.mlp.experts.12.down_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.mlp.experts.12.gate_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.mlp.experts.12.up_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.mlp.experts.13.down_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.mlp.experts.13.gate_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.mlp.experts.13.up_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.mlp.experts.14.down_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.mlp.experts.14.gate_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.mlp.experts.14.up_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.mlp.experts.15.down_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.mlp.experts.15.gate_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.mlp.experts.15.up_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.mlp.experts.16.down_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.mlp.experts.16.gate_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.mlp.experts.16.up_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.mlp.experts.17.down_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.mlp.experts.17.gate_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.mlp.experts.17.up_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.mlp.experts.18.down_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.mlp.experts.18.gate_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.mlp.experts.18.up_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.mlp.experts.19.down_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.mlp.experts.19.gate_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.mlp.experts.19.up_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.mlp.experts.2.down_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.mlp.experts.2.gate_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.mlp.experts.2.up_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.mlp.experts.20.down_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.mlp.experts.20.gate_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.mlp.experts.20.up_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.mlp.experts.21.down_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.mlp.experts.21.gate_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.mlp.experts.21.up_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.mlp.experts.22.down_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.mlp.experts.22.gate_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.mlp.experts.22.up_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.mlp.experts.23.down_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.mlp.experts.23.gate_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.mlp.experts.23.up_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.mlp.experts.24.down_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.mlp.experts.24.gate_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.mlp.experts.24.up_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.mlp.experts.25.down_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.mlp.experts.25.gate_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.mlp.experts.25.up_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.mlp.experts.26.down_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.mlp.experts.26.gate_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.mlp.experts.26.up_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.mlp.experts.27.down_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.mlp.experts.27.gate_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.mlp.experts.27.up_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.mlp.experts.28.down_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.mlp.experts.28.gate_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.mlp.experts.28.up_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.mlp.experts.29.down_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.mlp.experts.29.gate_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.mlp.experts.29.up_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.mlp.experts.3.down_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.mlp.experts.3.gate_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.mlp.experts.3.up_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.mlp.experts.30.down_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.mlp.experts.30.gate_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.mlp.experts.30.up_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.mlp.experts.31.down_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.mlp.experts.31.gate_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.mlp.experts.31.up_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.mlp.experts.32.down_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.mlp.experts.32.gate_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.mlp.experts.32.up_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.mlp.experts.33.down_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.mlp.experts.33.gate_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.mlp.experts.33.up_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.mlp.experts.34.down_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.mlp.experts.34.gate_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.mlp.experts.34.up_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.mlp.experts.35.down_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.mlp.experts.35.gate_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.mlp.experts.35.up_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.mlp.experts.36.down_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.mlp.experts.36.gate_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.mlp.experts.36.up_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.mlp.experts.37.down_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.mlp.experts.37.gate_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.mlp.experts.37.up_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.mlp.experts.38.down_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.38.gate_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.mlp.experts.38.up_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.39.down_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.39.gate_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.39.up_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.4.down_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.mlp.experts.4.gate_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.mlp.experts.4.up_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.mlp.experts.40.down_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.40.gate_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.40.up_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.41.down_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.41.gate_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.41.up_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.42.down_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.42.gate_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.42.up_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.43.down_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.43.gate_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.43.up_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.44.down_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.44.gate_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.44.up_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.45.down_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.45.gate_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.45.up_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.46.down_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.46.gate_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.46.up_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.47.down_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.47.gate_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.47.up_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.48.down_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.48.gate_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.48.up_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.49.down_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.49.gate_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.49.up_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.5.down_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.mlp.experts.5.gate_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.mlp.experts.5.up_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.mlp.experts.50.down_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.50.gate_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.50.up_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.51.down_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.51.gate_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.51.up_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.52.down_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.52.gate_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.52.up_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.53.down_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.53.gate_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.53.up_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.54.down_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.54.gate_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.54.up_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.55.down_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.55.gate_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.55.up_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.56.down_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.56.gate_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.56.up_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.57.down_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.57.gate_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.57.up_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.58.down_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.58.gate_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.58.up_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.59.down_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.59.gate_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.59.up_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.6.down_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.mlp.experts.6.gate_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.mlp.experts.6.up_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.mlp.experts.60.down_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.60.gate_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.60.up_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.61.down_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.61.gate_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.61.up_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.62.down_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.62.gate_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.62.up_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.63.down_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.63.gate_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.63.up_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.64.down_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.64.gate_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.64.up_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.65.down_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.65.gate_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.65.up_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.66.down_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.66.gate_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.66.up_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.67.down_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.67.gate_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.67.up_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.68.down_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.68.gate_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.68.up_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.69.down_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.69.gate_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.69.up_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.7.down_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.mlp.experts.7.gate_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.mlp.experts.7.up_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.mlp.experts.70.down_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.70.gate_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.70.up_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.71.down_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.71.gate_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.71.up_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.72.down_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.72.gate_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.72.up_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.73.down_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.73.gate_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.73.up_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.74.down_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.74.gate_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.74.up_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.75.down_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.75.gate_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.75.up_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.76.down_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.76.gate_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.76.up_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.77.down_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.77.gate_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.77.up_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.78.down_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.78.gate_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.78.up_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.79.down_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.79.gate_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.79.up_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.experts.8.down_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.mlp.experts.8.gate_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.mlp.experts.8.up_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.mlp.experts.9.down_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.mlp.experts.9.gate_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.mlp.experts.9.up_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.mlp.gate.e_score_correction_bias": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.gate.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.shared_experts.down_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.shared_experts.gate_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.mlp.shared_experts.up_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00021-of-00075.safetensors", + "model.layers.26.self_attn.k_norm.weight": "model-00020-of-00075.safetensors", + "model.layers.26.self_attn.k_proj.bias": "model-00020-of-00075.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.self_attn.q_norm.weight": "model-00020-of-00075.safetensors", + "model.layers.26.self_attn.q_proj.bias": "model-00020-of-00075.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.26.self_attn.v_proj.bias": "model-00020-of-00075.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00020-of-00075.safetensors", + "model.layers.27.input_layernorm.weight": "model-00022-of-00075.safetensors", + "model.layers.27.mlp.experts.0.down_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.0.gate_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.0.up_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.1.down_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.1.gate_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.1.up_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.10.down_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.10.gate_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.10.up_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.11.down_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.11.gate_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.11.up_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.12.down_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.12.gate_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.12.up_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.13.down_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.13.gate_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.13.up_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.14.down_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.14.gate_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.14.up_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.15.down_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.15.gate_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.15.up_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.16.down_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.16.gate_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.16.up_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.17.down_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.17.gate_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.17.up_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.18.down_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.18.gate_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.18.up_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.19.down_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.19.gate_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.19.up_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.2.down_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.2.gate_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.2.up_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.20.down_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.20.gate_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.20.up_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.21.down_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.21.gate_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.21.up_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.22.down_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.22.gate_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.22.up_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.23.down_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.23.gate_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.23.up_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.24.down_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.24.gate_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.24.up_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.25.down_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.25.gate_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.25.up_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.26.down_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.26.gate_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.26.up_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.27.down_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.27.gate_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.27.up_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.28.down_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.28.gate_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.28.up_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.29.down_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.29.gate_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.29.up_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.3.down_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.3.gate_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.3.up_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.30.down_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.30.gate_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.30.up_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.31.down_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.31.gate_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.31.up_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.32.down_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.32.gate_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.32.up_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.33.down_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.33.gate_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.33.up_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.34.down_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.34.gate_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.34.up_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.35.down_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.35.gate_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.35.up_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.36.down_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.36.gate_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.36.up_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.37.down_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.37.gate_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.37.up_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.38.down_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.38.gate_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.38.up_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.39.down_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.39.gate_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.39.up_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.4.down_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.4.gate_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.4.up_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.40.down_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.40.gate_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.40.up_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.41.down_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.41.gate_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.41.up_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.42.down_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.42.gate_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.42.up_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.43.down_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.43.gate_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.43.up_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.44.down_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.44.gate_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.44.up_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.45.down_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.45.gate_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.45.up_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.46.down_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.46.gate_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.46.up_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.47.down_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.47.gate_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.47.up_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.48.down_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.48.gate_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.48.up_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.49.down_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.49.gate_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.49.up_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.5.down_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.5.gate_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.5.up_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.50.down_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.50.gate_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.50.up_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.51.down_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.51.gate_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.51.up_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.52.down_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.52.gate_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.52.up_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.53.down_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.53.gate_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.53.up_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.54.down_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.54.gate_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.54.up_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.55.down_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.55.gate_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.55.up_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.56.down_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.56.gate_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.56.up_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.57.down_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.27.mlp.experts.57.gate_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.57.up_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.27.mlp.experts.58.down_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.27.mlp.experts.58.gate_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.27.mlp.experts.58.up_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.27.mlp.experts.59.down_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.27.mlp.experts.59.gate_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.27.mlp.experts.59.up_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.27.mlp.experts.6.down_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.6.gate_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.6.up_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.60.down_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.27.mlp.experts.60.gate_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.27.mlp.experts.60.up_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.27.mlp.experts.61.down_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.27.mlp.experts.61.gate_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.27.mlp.experts.61.up_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.27.mlp.experts.62.down_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.27.mlp.experts.62.gate_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.27.mlp.experts.62.up_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.27.mlp.experts.63.down_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.27.mlp.experts.63.gate_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.27.mlp.experts.63.up_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.27.mlp.experts.64.down_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.27.mlp.experts.64.gate_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.27.mlp.experts.64.up_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.27.mlp.experts.65.down_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.27.mlp.experts.65.gate_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.27.mlp.experts.65.up_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.27.mlp.experts.66.down_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.27.mlp.experts.66.gate_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.27.mlp.experts.66.up_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.27.mlp.experts.67.down_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.27.mlp.experts.67.gate_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.27.mlp.experts.67.up_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.27.mlp.experts.68.down_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.27.mlp.experts.68.gate_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.27.mlp.experts.68.up_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.27.mlp.experts.69.down_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.27.mlp.experts.69.gate_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.27.mlp.experts.69.up_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.27.mlp.experts.7.down_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.7.gate_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.7.up_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.70.down_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.27.mlp.experts.70.gate_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.27.mlp.experts.70.up_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.27.mlp.experts.71.down_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.27.mlp.experts.71.gate_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.27.mlp.experts.71.up_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.27.mlp.experts.72.down_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.27.mlp.experts.72.gate_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.27.mlp.experts.72.up_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.27.mlp.experts.73.down_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.27.mlp.experts.73.gate_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.27.mlp.experts.73.up_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.27.mlp.experts.74.down_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.27.mlp.experts.74.gate_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.27.mlp.experts.74.up_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.27.mlp.experts.75.down_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.27.mlp.experts.75.gate_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.27.mlp.experts.75.up_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.27.mlp.experts.76.down_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.27.mlp.experts.76.gate_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.27.mlp.experts.76.up_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.27.mlp.experts.77.down_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.27.mlp.experts.77.gate_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.27.mlp.experts.77.up_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.27.mlp.experts.78.down_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.27.mlp.experts.78.gate_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.27.mlp.experts.78.up_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.27.mlp.experts.79.down_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.27.mlp.experts.79.gate_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.27.mlp.experts.79.up_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.27.mlp.experts.8.down_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.8.gate_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.8.up_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.9.down_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.9.gate_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.experts.9.up_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.mlp.gate.e_score_correction_bias": "model-00022-of-00075.safetensors", + "model.layers.27.mlp.gate.weight": "model-00022-of-00075.safetensors", + "model.layers.27.mlp.shared_experts.down_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.27.mlp.shared_experts.gate_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.27.mlp.shared_experts.up_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00022-of-00075.safetensors", + "model.layers.27.self_attn.k_norm.weight": "model-00021-of-00075.safetensors", + "model.layers.27.self_attn.k_proj.bias": "model-00021-of-00075.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.self_attn.q_norm.weight": "model-00021-of-00075.safetensors", + "model.layers.27.self_attn.q_proj.bias": "model-00021-of-00075.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.27.self_attn.v_proj.bias": "model-00021-of-00075.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00021-of-00075.safetensors", + "model.layers.28.input_layernorm.weight": "model-00023-of-00075.safetensors", + "model.layers.28.mlp.experts.0.down_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.0.gate_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.0.up_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.1.down_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.1.gate_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.1.up_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.10.down_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.10.gate_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.10.up_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.11.down_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.11.gate_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.11.up_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.12.down_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.12.gate_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.12.up_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.13.down_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.13.gate_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.13.up_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.14.down_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.14.gate_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.14.up_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.15.down_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.15.gate_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.15.up_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.16.down_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.16.gate_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.16.up_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.17.down_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.17.gate_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.17.up_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.18.down_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.18.gate_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.18.up_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.19.down_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.19.gate_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.19.up_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.2.down_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.2.gate_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.2.up_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.20.down_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.20.gate_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.20.up_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.21.down_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.21.gate_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.21.up_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.22.down_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.22.gate_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.22.up_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.23.down_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.23.gate_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.23.up_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.24.down_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.24.gate_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.24.up_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.25.down_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.25.gate_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.25.up_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.26.down_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.26.gate_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.26.up_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.27.down_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.27.gate_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.27.up_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.28.down_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.28.gate_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.28.up_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.29.down_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.29.gate_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.29.up_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.3.down_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.3.gate_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.3.up_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.30.down_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.30.gate_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.30.up_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.31.down_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.31.gate_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.31.up_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.32.down_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.32.gate_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.32.up_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.33.down_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.33.gate_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.33.up_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.34.down_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.34.gate_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.34.up_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.35.down_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.35.gate_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.35.up_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.36.down_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.36.gate_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.36.up_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.37.down_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.37.gate_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.37.up_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.38.down_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.38.gate_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.38.up_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.39.down_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.39.gate_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.39.up_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.4.down_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.4.gate_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.4.up_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.40.down_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.40.gate_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.40.up_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.41.down_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.41.gate_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.41.up_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.42.down_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.42.gate_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.42.up_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.43.down_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.43.gate_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.43.up_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.44.down_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.44.gate_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.44.up_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.45.down_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.45.gate_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.45.up_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.46.down_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.46.gate_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.46.up_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.47.down_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.47.gate_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.47.up_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.48.down_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.48.gate_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.48.up_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.49.down_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.49.gate_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.49.up_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.5.down_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.5.gate_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.5.up_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.50.down_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.50.gate_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.50.up_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.51.down_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.51.gate_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.51.up_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.52.down_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.52.gate_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.52.up_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.53.down_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.53.gate_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.53.up_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.54.down_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.54.gate_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.54.up_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.55.down_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.55.gate_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.55.up_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.56.down_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.56.gate_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.56.up_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.57.down_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.57.gate_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.57.up_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.58.down_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.58.gate_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.58.up_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.59.down_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.59.gate_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.59.up_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.6.down_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.6.gate_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.6.up_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.60.down_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.60.gate_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.60.up_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.61.down_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.61.gate_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.61.up_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.62.down_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.62.gate_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.62.up_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.63.down_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.63.gate_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.63.up_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.64.down_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.64.gate_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.64.up_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.65.down_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.65.gate_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.65.up_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.66.down_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.66.gate_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.66.up_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.67.down_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.67.gate_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.67.up_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.68.down_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.68.gate_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.68.up_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.69.down_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.69.gate_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.69.up_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.7.down_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.7.gate_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.7.up_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.70.down_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.70.gate_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.70.up_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.71.down_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.71.gate_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.71.up_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.72.down_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.72.gate_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.72.up_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.73.down_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.73.gate_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.73.up_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.74.down_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.74.gate_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.74.up_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.75.down_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.75.gate_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.75.up_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.76.down_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.28.mlp.experts.76.gate_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.76.up_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.28.mlp.experts.77.down_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.28.mlp.experts.77.gate_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.28.mlp.experts.77.up_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.28.mlp.experts.78.down_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.28.mlp.experts.78.gate_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.28.mlp.experts.78.up_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.28.mlp.experts.79.down_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.28.mlp.experts.79.gate_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.28.mlp.experts.79.up_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.28.mlp.experts.8.down_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.8.gate_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.8.up_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.9.down_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.9.gate_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.experts.9.up_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.mlp.gate.e_score_correction_bias": "model-00023-of-00075.safetensors", + "model.layers.28.mlp.gate.weight": "model-00023-of-00075.safetensors", + "model.layers.28.mlp.shared_experts.down_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.28.mlp.shared_experts.gate_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.28.mlp.shared_experts.up_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.28.post_attention_layernorm.weight": "model-00023-of-00075.safetensors", + "model.layers.28.self_attn.k_norm.weight": "model-00022-of-00075.safetensors", + "model.layers.28.self_attn.k_proj.bias": "model-00022-of-00075.safetensors", + "model.layers.28.self_attn.k_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.self_attn.o_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.self_attn.q_norm.weight": "model-00022-of-00075.safetensors", + "model.layers.28.self_attn.q_proj.bias": "model-00022-of-00075.safetensors", + "model.layers.28.self_attn.q_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.28.self_attn.v_proj.bias": "model-00022-of-00075.safetensors", + "model.layers.28.self_attn.v_proj.weight": "model-00022-of-00075.safetensors", + "model.layers.29.input_layernorm.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.0.down_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.0.gate_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.0.up_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.1.down_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.1.gate_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.1.up_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.10.down_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.10.gate_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.10.up_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.11.down_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.11.gate_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.11.up_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.12.down_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.12.gate_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.12.up_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.13.down_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.13.gate_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.13.up_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.14.down_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.14.gate_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.14.up_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.15.down_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.15.gate_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.15.up_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.16.down_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.16.gate_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.16.up_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.17.down_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.17.gate_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.17.up_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.18.down_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.18.gate_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.18.up_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.19.down_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.19.gate_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.19.up_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.2.down_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.2.gate_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.2.up_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.20.down_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.20.gate_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.20.up_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.21.down_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.21.gate_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.21.up_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.22.down_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.22.gate_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.22.up_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.23.down_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.23.gate_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.23.up_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.24.down_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.24.gate_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.24.up_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.25.down_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.25.gate_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.25.up_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.26.down_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.26.gate_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.26.up_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.27.down_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.27.gate_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.27.up_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.28.down_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.28.gate_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.28.up_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.29.down_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.29.gate_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.29.up_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.3.down_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.3.gate_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.3.up_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.30.down_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.30.gate_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.30.up_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.31.down_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.31.gate_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.31.up_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.32.down_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.32.gate_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.32.up_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.33.down_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.33.gate_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.33.up_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.34.down_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.34.gate_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.34.up_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.35.down_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.35.gate_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.35.up_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.36.down_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.36.gate_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.36.up_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.37.down_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.37.gate_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.37.up_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.38.down_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.38.gate_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.38.up_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.39.down_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.39.gate_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.39.up_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.4.down_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.4.gate_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.4.up_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.40.down_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.40.gate_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.40.up_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.41.down_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.41.gate_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.41.up_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.42.down_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.42.gate_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.42.up_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.43.down_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.43.gate_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.43.up_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.44.down_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.44.gate_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.44.up_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.45.down_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.45.gate_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.45.up_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.46.down_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.46.gate_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.46.up_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.47.down_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.47.gate_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.47.up_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.48.down_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.48.gate_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.48.up_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.49.down_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.49.gate_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.49.up_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.5.down_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.5.gate_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.5.up_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.50.down_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.50.gate_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.50.up_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.51.down_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.51.gate_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.51.up_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.52.down_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.52.gate_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.52.up_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.53.down_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.53.gate_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.53.up_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.54.down_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.54.gate_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.54.up_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.55.down_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.55.gate_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.55.up_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.56.down_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.56.gate_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.56.up_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.57.down_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.57.gate_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.57.up_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.58.down_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.58.gate_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.58.up_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.59.down_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.59.gate_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.59.up_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.6.down_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.6.gate_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.6.up_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.60.down_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.60.gate_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.60.up_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.61.down_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.61.gate_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.61.up_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.62.down_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.62.gate_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.62.up_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.63.down_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.63.gate_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.63.up_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.64.down_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.64.gate_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.64.up_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.65.down_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.65.gate_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.65.up_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.66.down_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.66.gate_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.66.up_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.67.down_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.67.gate_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.67.up_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.68.down_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.68.gate_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.68.up_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.69.down_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.69.gate_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.69.up_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.7.down_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.7.gate_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.7.up_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.70.down_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.70.gate_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.70.up_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.71.down_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.71.gate_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.71.up_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.72.down_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.72.gate_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.72.up_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.73.down_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.73.gate_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.73.up_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.74.down_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.74.gate_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.74.up_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.75.down_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.75.gate_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.75.up_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.76.down_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.76.gate_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.76.up_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.77.down_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.77.gate_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.77.up_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.78.down_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.78.gate_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.78.up_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.79.down_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.79.gate_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.79.up_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.8.down_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.8.gate_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.8.up_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.9.down_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.9.gate_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.experts.9.up_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.gate.e_score_correction_bias": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.gate.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.shared_experts.down_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.shared_experts.gate_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.mlp.shared_experts.up_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.post_attention_layernorm.weight": "model-00023-of-00075.safetensors", + "model.layers.29.self_attn.k_norm.weight": "model-00023-of-00075.safetensors", + "model.layers.29.self_attn.k_proj.bias": "model-00023-of-00075.safetensors", + "model.layers.29.self_attn.k_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.self_attn.o_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.self_attn.q_norm.weight": "model-00023-of-00075.safetensors", + "model.layers.29.self_attn.q_proj.bias": "model-00023-of-00075.safetensors", + "model.layers.29.self_attn.q_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.29.self_attn.v_proj.bias": "model-00023-of-00075.safetensors", + "model.layers.29.self_attn.v_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.3.input_layernorm.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.0.down_proj.weight": "model-00001-of-00075.safetensors", + "model.layers.3.mlp.experts.0.gate_proj.weight": "model-00001-of-00075.safetensors", + "model.layers.3.mlp.experts.0.up_proj.weight": "model-00001-of-00075.safetensors", + "model.layers.3.mlp.experts.1.down_proj.weight": "model-00001-of-00075.safetensors", + "model.layers.3.mlp.experts.1.gate_proj.weight": "model-00001-of-00075.safetensors", + "model.layers.3.mlp.experts.1.up_proj.weight": "model-00001-of-00075.safetensors", + "model.layers.3.mlp.experts.10.down_proj.weight": "model-00001-of-00075.safetensors", + "model.layers.3.mlp.experts.10.gate_proj.weight": "model-00001-of-00075.safetensors", + "model.layers.3.mlp.experts.10.up_proj.weight": "model-00001-of-00075.safetensors", + "model.layers.3.mlp.experts.11.down_proj.weight": "model-00001-of-00075.safetensors", + "model.layers.3.mlp.experts.11.gate_proj.weight": "model-00001-of-00075.safetensors", + "model.layers.3.mlp.experts.11.up_proj.weight": "model-00001-of-00075.safetensors", + "model.layers.3.mlp.experts.12.down_proj.weight": "model-00001-of-00075.safetensors", + "model.layers.3.mlp.experts.12.gate_proj.weight": "model-00001-of-00075.safetensors", + "model.layers.3.mlp.experts.12.up_proj.weight": "model-00001-of-00075.safetensors", + "model.layers.3.mlp.experts.13.down_proj.weight": "model-00001-of-00075.safetensors", + "model.layers.3.mlp.experts.13.gate_proj.weight": "model-00001-of-00075.safetensors", + "model.layers.3.mlp.experts.13.up_proj.weight": "model-00001-of-00075.safetensors", + "model.layers.3.mlp.experts.14.down_proj.weight": "model-00001-of-00075.safetensors", + "model.layers.3.mlp.experts.14.gate_proj.weight": "model-00001-of-00075.safetensors", + "model.layers.3.mlp.experts.14.up_proj.weight": "model-00001-of-00075.safetensors", + "model.layers.3.mlp.experts.15.down_proj.weight": "model-00001-of-00075.safetensors", + "model.layers.3.mlp.experts.15.gate_proj.weight": "model-00001-of-00075.safetensors", + "model.layers.3.mlp.experts.15.up_proj.weight": "model-00001-of-00075.safetensors", + "model.layers.3.mlp.experts.16.down_proj.weight": "model-00001-of-00075.safetensors", + "model.layers.3.mlp.experts.16.gate_proj.weight": "model-00001-of-00075.safetensors", + "model.layers.3.mlp.experts.16.up_proj.weight": "model-00001-of-00075.safetensors", + "model.layers.3.mlp.experts.17.down_proj.weight": "model-00001-of-00075.safetensors", + "model.layers.3.mlp.experts.17.gate_proj.weight": "model-00001-of-00075.safetensors", + "model.layers.3.mlp.experts.17.up_proj.weight": "model-00001-of-00075.safetensors", + "model.layers.3.mlp.experts.18.down_proj.weight": "model-00001-of-00075.safetensors", + "model.layers.3.mlp.experts.18.gate_proj.weight": "model-00001-of-00075.safetensors", + "model.layers.3.mlp.experts.18.up_proj.weight": "model-00001-of-00075.safetensors", + "model.layers.3.mlp.experts.19.down_proj.weight": "model-00001-of-00075.safetensors", + "model.layers.3.mlp.experts.19.gate_proj.weight": "model-00001-of-00075.safetensors", + "model.layers.3.mlp.experts.19.up_proj.weight": "model-00001-of-00075.safetensors", + "model.layers.3.mlp.experts.2.down_proj.weight": "model-00001-of-00075.safetensors", + "model.layers.3.mlp.experts.2.gate_proj.weight": "model-00001-of-00075.safetensors", + "model.layers.3.mlp.experts.2.up_proj.weight": "model-00001-of-00075.safetensors", + "model.layers.3.mlp.experts.20.down_proj.weight": "model-00001-of-00075.safetensors", + "model.layers.3.mlp.experts.20.gate_proj.weight": "model-00001-of-00075.safetensors", + "model.layers.3.mlp.experts.20.up_proj.weight": "model-00001-of-00075.safetensors", + "model.layers.3.mlp.experts.21.down_proj.weight": "model-00001-of-00075.safetensors", + "model.layers.3.mlp.experts.21.gate_proj.weight": "model-00001-of-00075.safetensors", + "model.layers.3.mlp.experts.21.up_proj.weight": "model-00001-of-00075.safetensors", + "model.layers.3.mlp.experts.22.down_proj.weight": "model-00001-of-00075.safetensors", + "model.layers.3.mlp.experts.22.gate_proj.weight": "model-00001-of-00075.safetensors", + "model.layers.3.mlp.experts.22.up_proj.weight": "model-00001-of-00075.safetensors", + "model.layers.3.mlp.experts.23.down_proj.weight": "model-00001-of-00075.safetensors", + "model.layers.3.mlp.experts.23.gate_proj.weight": "model-00001-of-00075.safetensors", + "model.layers.3.mlp.experts.23.up_proj.weight": "model-00001-of-00075.safetensors", + "model.layers.3.mlp.experts.24.down_proj.weight": "model-00001-of-00075.safetensors", + "model.layers.3.mlp.experts.24.gate_proj.weight": "model-00001-of-00075.safetensors", + "model.layers.3.mlp.experts.24.up_proj.weight": "model-00001-of-00075.safetensors", + "model.layers.3.mlp.experts.25.down_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.25.gate_proj.weight": "model-00001-of-00075.safetensors", + "model.layers.3.mlp.experts.25.up_proj.weight": "model-00001-of-00075.safetensors", + "model.layers.3.mlp.experts.26.down_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.26.gate_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.26.up_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.27.down_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.27.gate_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.27.up_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.28.down_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.28.gate_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.28.up_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.29.down_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.29.gate_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.29.up_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.3.down_proj.weight": "model-00001-of-00075.safetensors", + "model.layers.3.mlp.experts.3.gate_proj.weight": "model-00001-of-00075.safetensors", + "model.layers.3.mlp.experts.3.up_proj.weight": "model-00001-of-00075.safetensors", + "model.layers.3.mlp.experts.30.down_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.30.gate_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.30.up_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.31.down_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.31.gate_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.31.up_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.32.down_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.32.gate_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.32.up_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.33.down_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.33.gate_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.33.up_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.34.down_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.34.gate_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.34.up_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.35.down_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.35.gate_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.35.up_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.36.down_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.36.gate_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.36.up_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.37.down_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.37.gate_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.37.up_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.38.down_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.38.gate_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.38.up_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.39.down_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.39.gate_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.39.up_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.4.down_proj.weight": "model-00001-of-00075.safetensors", + "model.layers.3.mlp.experts.4.gate_proj.weight": "model-00001-of-00075.safetensors", + "model.layers.3.mlp.experts.4.up_proj.weight": "model-00001-of-00075.safetensors", + "model.layers.3.mlp.experts.40.down_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.40.gate_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.40.up_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.41.down_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.41.gate_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.41.up_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.42.down_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.42.gate_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.42.up_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.43.down_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.43.gate_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.43.up_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.44.down_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.44.gate_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.44.up_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.45.down_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.45.gate_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.45.up_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.46.down_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.46.gate_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.46.up_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.47.down_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.47.gate_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.47.up_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.48.down_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.48.gate_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.48.up_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.49.down_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.49.gate_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.49.up_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.5.down_proj.weight": "model-00001-of-00075.safetensors", + "model.layers.3.mlp.experts.5.gate_proj.weight": "model-00001-of-00075.safetensors", + "model.layers.3.mlp.experts.5.up_proj.weight": "model-00001-of-00075.safetensors", + "model.layers.3.mlp.experts.50.down_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.50.gate_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.50.up_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.51.down_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.51.gate_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.51.up_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.52.down_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.52.gate_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.52.up_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.53.down_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.53.gate_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.53.up_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.54.down_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.54.gate_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.54.up_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.55.down_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.55.gate_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.55.up_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.56.down_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.56.gate_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.56.up_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.57.down_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.57.gate_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.57.up_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.58.down_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.58.gate_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.58.up_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.59.down_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.59.gate_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.59.up_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.6.down_proj.weight": "model-00001-of-00075.safetensors", + "model.layers.3.mlp.experts.6.gate_proj.weight": "model-00001-of-00075.safetensors", + "model.layers.3.mlp.experts.6.up_proj.weight": "model-00001-of-00075.safetensors", + "model.layers.3.mlp.experts.60.down_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.60.gate_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.60.up_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.61.down_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.61.gate_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.61.up_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.62.down_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.62.gate_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.62.up_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.63.down_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.63.gate_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.63.up_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.64.down_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.64.gate_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.64.up_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.65.down_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.65.gate_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.65.up_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.66.down_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.66.gate_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.66.up_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.67.down_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.67.gate_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.67.up_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.68.down_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.68.gate_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.68.up_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.69.down_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.69.gate_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.69.up_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.7.down_proj.weight": "model-00001-of-00075.safetensors", + "model.layers.3.mlp.experts.7.gate_proj.weight": "model-00001-of-00075.safetensors", + "model.layers.3.mlp.experts.7.up_proj.weight": "model-00001-of-00075.safetensors", + "model.layers.3.mlp.experts.70.down_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.70.gate_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.70.up_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.71.down_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.71.gate_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.71.up_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.72.down_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.72.gate_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.72.up_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.73.down_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.73.gate_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.73.up_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.74.down_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.74.gate_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.74.up_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.75.down_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.75.gate_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.75.up_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.76.down_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.76.gate_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.76.up_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.77.down_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.77.gate_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.77.up_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.78.down_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.78.gate_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.78.up_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.79.down_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.79.gate_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.79.up_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.experts.8.down_proj.weight": "model-00001-of-00075.safetensors", + "model.layers.3.mlp.experts.8.gate_proj.weight": "model-00001-of-00075.safetensors", + "model.layers.3.mlp.experts.8.up_proj.weight": "model-00001-of-00075.safetensors", + "model.layers.3.mlp.experts.9.down_proj.weight": "model-00001-of-00075.safetensors", + "model.layers.3.mlp.experts.9.gate_proj.weight": "model-00001-of-00075.safetensors", + "model.layers.3.mlp.experts.9.up_proj.weight": "model-00001-of-00075.safetensors", + "model.layers.3.mlp.gate.e_score_correction_bias": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.gate.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.shared_experts.down_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.shared_experts.gate_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.mlp.shared_experts.up_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00002-of-00075.safetensors", + "model.layers.3.self_attn.k_norm.weight": "model-00001-of-00075.safetensors", + "model.layers.3.self_attn.k_proj.bias": "model-00001-of-00075.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00075.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00075.safetensors", + "model.layers.3.self_attn.q_norm.weight": "model-00001-of-00075.safetensors", + "model.layers.3.self_attn.q_proj.bias": "model-00001-of-00075.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00075.safetensors", + "model.layers.3.self_attn.v_proj.bias": "model-00001-of-00075.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00075.safetensors", + "model.layers.30.input_layernorm.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.0.down_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.30.mlp.experts.0.gate_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.30.mlp.experts.0.up_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.30.mlp.experts.1.down_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.30.mlp.experts.1.gate_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.30.mlp.experts.1.up_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.30.mlp.experts.10.down_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.10.gate_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.10.up_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.11.down_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.11.gate_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.11.up_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.12.down_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.12.gate_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.12.up_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.13.down_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.13.gate_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.13.up_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.14.down_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.14.gate_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.14.up_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.15.down_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.15.gate_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.15.up_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.16.down_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.16.gate_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.16.up_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.17.down_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.17.gate_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.17.up_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.18.down_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.18.gate_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.18.up_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.19.down_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.19.gate_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.19.up_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.2.down_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.30.mlp.experts.2.gate_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.30.mlp.experts.2.up_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.30.mlp.experts.20.down_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.20.gate_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.20.up_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.21.down_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.21.gate_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.21.up_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.22.down_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.22.gate_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.22.up_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.23.down_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.23.gate_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.23.up_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.24.down_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.24.gate_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.24.up_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.25.down_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.25.gate_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.25.up_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.26.down_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.26.gate_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.26.up_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.27.down_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.27.gate_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.27.up_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.28.down_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.28.gate_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.28.up_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.29.down_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.29.gate_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.29.up_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.3.down_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.30.mlp.experts.3.gate_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.30.mlp.experts.3.up_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.30.mlp.experts.30.down_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.30.gate_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.30.up_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.31.down_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.31.gate_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.31.up_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.32.down_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.32.gate_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.32.up_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.33.down_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.33.gate_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.33.up_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.34.down_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.34.gate_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.34.up_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.35.down_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.35.gate_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.35.up_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.36.down_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.36.gate_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.36.up_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.37.down_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.37.gate_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.37.up_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.38.down_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.38.gate_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.38.up_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.39.down_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.39.gate_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.39.up_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.4.down_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.30.mlp.experts.4.gate_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.30.mlp.experts.4.up_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.30.mlp.experts.40.down_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.40.gate_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.40.up_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.41.down_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.41.gate_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.41.up_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.42.down_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.42.gate_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.42.up_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.43.down_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.43.gate_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.43.up_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.44.down_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.44.gate_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.44.up_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.45.down_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.45.gate_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.45.up_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.46.down_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.46.gate_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.46.up_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.47.down_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.47.gate_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.47.up_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.48.down_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.48.gate_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.48.up_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.49.down_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.49.gate_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.49.up_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.5.down_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.30.mlp.experts.5.gate_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.30.mlp.experts.5.up_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.30.mlp.experts.50.down_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.50.gate_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.50.up_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.51.down_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.51.gate_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.51.up_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.52.down_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.52.gate_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.52.up_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.53.down_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.53.gate_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.53.up_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.54.down_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.54.gate_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.54.up_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.55.down_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.55.gate_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.55.up_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.56.down_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.56.gate_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.56.up_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.57.down_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.57.gate_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.57.up_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.58.down_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.58.gate_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.58.up_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.59.down_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.59.gate_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.59.up_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.6.down_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.30.mlp.experts.6.gate_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.30.mlp.experts.6.up_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.30.mlp.experts.60.down_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.60.gate_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.60.up_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.61.down_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.61.gate_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.61.up_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.62.down_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.62.gate_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.62.up_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.63.down_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.63.gate_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.63.up_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.64.down_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.64.gate_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.64.up_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.65.down_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.65.gate_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.65.up_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.66.down_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.66.gate_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.66.up_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.67.down_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.67.gate_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.67.up_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.68.down_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.68.gate_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.68.up_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.69.down_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.69.gate_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.69.up_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.7.down_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.30.mlp.experts.7.gate_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.30.mlp.experts.7.up_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.30.mlp.experts.70.down_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.70.gate_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.70.up_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.71.down_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.71.gate_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.71.up_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.72.down_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.72.gate_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.72.up_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.73.down_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.73.gate_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.73.up_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.74.down_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.74.gate_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.74.up_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.75.down_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.75.gate_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.75.up_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.76.down_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.76.gate_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.76.up_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.77.down_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.77.gate_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.77.up_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.78.down_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.78.gate_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.78.up_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.79.down_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.79.gate_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.79.up_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.8.down_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.8.gate_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.30.mlp.experts.8.up_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.30.mlp.experts.9.down_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.9.gate_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.experts.9.up_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.gate.e_score_correction_bias": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.gate.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.shared_experts.down_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.shared_experts.gate_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.mlp.shared_experts.up_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.30.post_attention_layernorm.weight": "model-00024-of-00075.safetensors", + "model.layers.30.self_attn.k_norm.weight": "model-00023-of-00075.safetensors", + "model.layers.30.self_attn.k_proj.bias": "model-00023-of-00075.safetensors", + "model.layers.30.self_attn.k_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.30.self_attn.o_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.30.self_attn.q_norm.weight": "model-00023-of-00075.safetensors", + "model.layers.30.self_attn.q_proj.bias": "model-00023-of-00075.safetensors", + "model.layers.30.self_attn.q_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.30.self_attn.v_proj.bias": "model-00023-of-00075.safetensors", + "model.layers.30.self_attn.v_proj.weight": "model-00023-of-00075.safetensors", + "model.layers.31.input_layernorm.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.0.down_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.31.mlp.experts.0.gate_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.31.mlp.experts.0.up_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.31.mlp.experts.1.down_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.31.mlp.experts.1.gate_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.31.mlp.experts.1.up_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.31.mlp.experts.10.down_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.31.mlp.experts.10.gate_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.31.mlp.experts.10.up_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.31.mlp.experts.11.down_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.31.mlp.experts.11.gate_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.31.mlp.experts.11.up_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.31.mlp.experts.12.down_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.31.mlp.experts.12.gate_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.31.mlp.experts.12.up_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.31.mlp.experts.13.down_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.31.mlp.experts.13.gate_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.31.mlp.experts.13.up_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.31.mlp.experts.14.down_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.31.mlp.experts.14.gate_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.31.mlp.experts.14.up_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.31.mlp.experts.15.down_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.31.mlp.experts.15.gate_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.31.mlp.experts.15.up_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.31.mlp.experts.16.down_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.31.mlp.experts.16.gate_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.31.mlp.experts.16.up_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.31.mlp.experts.17.down_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.31.mlp.experts.17.gate_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.31.mlp.experts.17.up_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.31.mlp.experts.18.down_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.31.mlp.experts.18.gate_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.31.mlp.experts.18.up_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.31.mlp.experts.19.down_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.31.mlp.experts.19.gate_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.31.mlp.experts.19.up_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.31.mlp.experts.2.down_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.31.mlp.experts.2.gate_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.31.mlp.experts.2.up_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.31.mlp.experts.20.down_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.31.mlp.experts.20.gate_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.31.mlp.experts.20.up_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.31.mlp.experts.21.down_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.31.mlp.experts.21.gate_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.31.mlp.experts.21.up_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.31.mlp.experts.22.down_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.31.mlp.experts.22.gate_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.31.mlp.experts.22.up_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.31.mlp.experts.23.down_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.31.mlp.experts.23.gate_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.31.mlp.experts.23.up_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.31.mlp.experts.24.down_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.31.mlp.experts.24.gate_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.31.mlp.experts.24.up_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.31.mlp.experts.25.down_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.31.mlp.experts.25.gate_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.31.mlp.experts.25.up_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.31.mlp.experts.26.down_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.31.mlp.experts.26.gate_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.31.mlp.experts.26.up_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.31.mlp.experts.27.down_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.27.gate_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.31.mlp.experts.27.up_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.31.mlp.experts.28.down_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.28.gate_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.28.up_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.29.down_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.29.gate_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.29.up_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.3.down_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.31.mlp.experts.3.gate_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.31.mlp.experts.3.up_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.31.mlp.experts.30.down_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.30.gate_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.30.up_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.31.down_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.31.gate_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.31.up_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.32.down_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.32.gate_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.32.up_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.33.down_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.33.gate_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.33.up_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.34.down_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.34.gate_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.34.up_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.35.down_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.35.gate_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.35.up_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.36.down_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.36.gate_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.36.up_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.37.down_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.37.gate_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.37.up_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.38.down_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.38.gate_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.38.up_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.39.down_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.39.gate_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.39.up_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.4.down_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.31.mlp.experts.4.gate_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.31.mlp.experts.4.up_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.31.mlp.experts.40.down_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.40.gate_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.40.up_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.41.down_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.41.gate_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.41.up_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.42.down_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.42.gate_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.42.up_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.43.down_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.43.gate_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.43.up_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.44.down_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.44.gate_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.44.up_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.45.down_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.45.gate_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.45.up_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.46.down_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.46.gate_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.46.up_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.47.down_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.47.gate_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.47.up_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.48.down_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.48.gate_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.48.up_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.49.down_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.49.gate_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.49.up_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.5.down_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.31.mlp.experts.5.gate_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.31.mlp.experts.5.up_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.31.mlp.experts.50.down_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.50.gate_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.50.up_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.51.down_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.51.gate_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.51.up_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.52.down_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.52.gate_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.52.up_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.53.down_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.53.gate_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.53.up_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.54.down_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.54.gate_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.54.up_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.55.down_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.55.gate_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.55.up_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.56.down_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.56.gate_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.56.up_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.57.down_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.57.gate_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.57.up_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.58.down_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.58.gate_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.58.up_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.59.down_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.59.gate_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.59.up_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.6.down_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.31.mlp.experts.6.gate_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.31.mlp.experts.6.up_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.31.mlp.experts.60.down_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.60.gate_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.60.up_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.61.down_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.61.gate_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.61.up_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.62.down_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.62.gate_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.62.up_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.63.down_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.63.gate_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.63.up_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.64.down_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.64.gate_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.64.up_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.65.down_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.65.gate_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.65.up_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.66.down_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.66.gate_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.66.up_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.67.down_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.67.gate_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.67.up_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.68.down_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.68.gate_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.68.up_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.69.down_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.69.gate_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.69.up_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.7.down_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.31.mlp.experts.7.gate_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.31.mlp.experts.7.up_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.31.mlp.experts.70.down_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.70.gate_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.70.up_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.71.down_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.71.gate_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.71.up_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.72.down_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.72.gate_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.72.up_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.73.down_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.73.gate_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.73.up_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.74.down_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.74.gate_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.74.up_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.75.down_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.75.gate_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.75.up_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.76.down_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.76.gate_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.76.up_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.77.down_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.77.gate_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.77.up_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.78.down_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.78.gate_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.78.up_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.79.down_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.79.gate_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.79.up_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.experts.8.down_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.31.mlp.experts.8.gate_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.31.mlp.experts.8.up_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.31.mlp.experts.9.down_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.31.mlp.experts.9.gate_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.31.mlp.experts.9.up_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.31.mlp.gate.e_score_correction_bias": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.gate.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.shared_experts.down_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.shared_experts.gate_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.mlp.shared_experts.up_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.31.post_attention_layernorm.weight": "model-00025-of-00075.safetensors", + "model.layers.31.self_attn.k_norm.weight": "model-00024-of-00075.safetensors", + "model.layers.31.self_attn.k_proj.bias": "model-00024-of-00075.safetensors", + "model.layers.31.self_attn.k_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.31.self_attn.o_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.31.self_attn.q_norm.weight": "model-00024-of-00075.safetensors", + "model.layers.31.self_attn.q_proj.bias": "model-00024-of-00075.safetensors", + "model.layers.31.self_attn.q_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.31.self_attn.v_proj.bias": "model-00024-of-00075.safetensors", + "model.layers.31.self_attn.v_proj.weight": "model-00024-of-00075.safetensors", + "model.layers.32.input_layernorm.weight": "model-00026-of-00075.safetensors", + "model.layers.32.mlp.experts.0.down_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.0.gate_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.0.up_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.1.down_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.1.gate_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.1.up_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.10.down_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.10.gate_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.10.up_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.11.down_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.11.gate_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.11.up_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.12.down_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.12.gate_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.12.up_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.13.down_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.13.gate_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.13.up_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.14.down_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.14.gate_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.14.up_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.15.down_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.15.gate_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.15.up_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.16.down_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.16.gate_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.16.up_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.17.down_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.17.gate_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.17.up_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.18.down_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.18.gate_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.18.up_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.19.down_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.19.gate_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.19.up_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.2.down_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.2.gate_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.2.up_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.20.down_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.20.gate_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.20.up_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.21.down_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.21.gate_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.21.up_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.22.down_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.22.gate_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.22.up_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.23.down_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.23.gate_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.23.up_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.24.down_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.24.gate_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.24.up_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.25.down_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.25.gate_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.25.up_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.26.down_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.26.gate_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.26.up_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.27.down_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.27.gate_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.27.up_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.28.down_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.28.gate_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.28.up_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.29.down_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.29.gate_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.29.up_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.3.down_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.3.gate_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.3.up_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.30.down_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.30.gate_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.30.up_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.31.down_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.31.gate_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.31.up_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.32.down_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.32.gate_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.32.up_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.33.down_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.33.gate_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.33.up_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.34.down_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.34.gate_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.34.up_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.35.down_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.35.gate_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.35.up_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.36.down_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.36.gate_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.36.up_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.37.down_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.37.gate_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.37.up_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.38.down_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.38.gate_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.38.up_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.39.down_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.39.gate_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.39.up_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.4.down_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.4.gate_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.4.up_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.40.down_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.40.gate_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.40.up_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.41.down_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.41.gate_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.41.up_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.42.down_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.42.gate_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.42.up_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.43.down_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.43.gate_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.43.up_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.44.down_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.44.gate_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.44.up_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.45.down_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.45.gate_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.45.up_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.46.down_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.32.mlp.experts.46.gate_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.46.up_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.47.down_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.32.mlp.experts.47.gate_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.32.mlp.experts.47.up_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.32.mlp.experts.48.down_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.32.mlp.experts.48.gate_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.32.mlp.experts.48.up_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.32.mlp.experts.49.down_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.32.mlp.experts.49.gate_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.32.mlp.experts.49.up_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.32.mlp.experts.5.down_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.5.gate_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.5.up_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.50.down_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.32.mlp.experts.50.gate_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.32.mlp.experts.50.up_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.32.mlp.experts.51.down_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.32.mlp.experts.51.gate_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.32.mlp.experts.51.up_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.32.mlp.experts.52.down_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.32.mlp.experts.52.gate_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.32.mlp.experts.52.up_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.32.mlp.experts.53.down_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.32.mlp.experts.53.gate_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.32.mlp.experts.53.up_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.32.mlp.experts.54.down_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.32.mlp.experts.54.gate_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.32.mlp.experts.54.up_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.32.mlp.experts.55.down_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.32.mlp.experts.55.gate_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.32.mlp.experts.55.up_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.32.mlp.experts.56.down_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.32.mlp.experts.56.gate_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.32.mlp.experts.56.up_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.32.mlp.experts.57.down_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.32.mlp.experts.57.gate_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.32.mlp.experts.57.up_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.32.mlp.experts.58.down_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.32.mlp.experts.58.gate_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.32.mlp.experts.58.up_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.32.mlp.experts.59.down_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.32.mlp.experts.59.gate_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.32.mlp.experts.59.up_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.32.mlp.experts.6.down_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.6.gate_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.6.up_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.60.down_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.32.mlp.experts.60.gate_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.32.mlp.experts.60.up_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.32.mlp.experts.61.down_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.32.mlp.experts.61.gate_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.32.mlp.experts.61.up_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.32.mlp.experts.62.down_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.32.mlp.experts.62.gate_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.32.mlp.experts.62.up_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.32.mlp.experts.63.down_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.32.mlp.experts.63.gate_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.32.mlp.experts.63.up_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.32.mlp.experts.64.down_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.32.mlp.experts.64.gate_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.32.mlp.experts.64.up_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.32.mlp.experts.65.down_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.32.mlp.experts.65.gate_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.32.mlp.experts.65.up_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.32.mlp.experts.66.down_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.32.mlp.experts.66.gate_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.32.mlp.experts.66.up_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.32.mlp.experts.67.down_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.32.mlp.experts.67.gate_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.32.mlp.experts.67.up_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.32.mlp.experts.68.down_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.32.mlp.experts.68.gate_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.32.mlp.experts.68.up_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.32.mlp.experts.69.down_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.32.mlp.experts.69.gate_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.32.mlp.experts.69.up_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.32.mlp.experts.7.down_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.7.gate_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.7.up_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.70.down_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.32.mlp.experts.70.gate_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.32.mlp.experts.70.up_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.32.mlp.experts.71.down_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.32.mlp.experts.71.gate_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.32.mlp.experts.71.up_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.32.mlp.experts.72.down_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.32.mlp.experts.72.gate_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.32.mlp.experts.72.up_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.32.mlp.experts.73.down_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.32.mlp.experts.73.gate_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.32.mlp.experts.73.up_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.32.mlp.experts.74.down_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.32.mlp.experts.74.gate_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.32.mlp.experts.74.up_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.32.mlp.experts.75.down_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.32.mlp.experts.75.gate_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.32.mlp.experts.75.up_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.32.mlp.experts.76.down_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.32.mlp.experts.76.gate_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.32.mlp.experts.76.up_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.32.mlp.experts.77.down_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.32.mlp.experts.77.gate_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.32.mlp.experts.77.up_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.32.mlp.experts.78.down_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.32.mlp.experts.78.gate_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.32.mlp.experts.78.up_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.32.mlp.experts.79.down_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.32.mlp.experts.79.gate_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.32.mlp.experts.79.up_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.32.mlp.experts.8.down_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.8.gate_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.8.up_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.9.down_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.9.gate_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.experts.9.up_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.mlp.gate.e_score_correction_bias": "model-00026-of-00075.safetensors", + "model.layers.32.mlp.gate.weight": "model-00026-of-00075.safetensors", + "model.layers.32.mlp.shared_experts.down_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.32.mlp.shared_experts.gate_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.32.mlp.shared_experts.up_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.32.post_attention_layernorm.weight": "model-00026-of-00075.safetensors", + "model.layers.32.self_attn.k_norm.weight": "model-00025-of-00075.safetensors", + "model.layers.32.self_attn.k_proj.bias": "model-00025-of-00075.safetensors", + "model.layers.32.self_attn.k_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.self_attn.o_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.self_attn.q_norm.weight": "model-00025-of-00075.safetensors", + "model.layers.32.self_attn.q_proj.bias": "model-00025-of-00075.safetensors", + "model.layers.32.self_attn.q_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.32.self_attn.v_proj.bias": "model-00025-of-00075.safetensors", + "model.layers.32.self_attn.v_proj.weight": "model-00025-of-00075.safetensors", + "model.layers.33.input_layernorm.weight": "model-00027-of-00075.safetensors", + "model.layers.33.mlp.experts.0.down_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.0.gate_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.0.up_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.1.down_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.1.gate_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.1.up_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.10.down_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.10.gate_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.10.up_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.11.down_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.11.gate_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.11.up_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.12.down_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.12.gate_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.12.up_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.13.down_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.13.gate_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.13.up_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.14.down_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.14.gate_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.14.up_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.15.down_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.15.gate_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.15.up_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.16.down_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.16.gate_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.16.up_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.17.down_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.17.gate_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.17.up_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.18.down_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.18.gate_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.18.up_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.19.down_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.19.gate_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.19.up_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.2.down_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.2.gate_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.2.up_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.20.down_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.20.gate_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.20.up_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.21.down_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.21.gate_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.21.up_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.22.down_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.22.gate_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.22.up_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.23.down_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.23.gate_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.23.up_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.24.down_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.24.gate_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.24.up_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.25.down_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.25.gate_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.25.up_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.26.down_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.26.gate_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.26.up_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.27.down_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.27.gate_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.27.up_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.28.down_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.28.gate_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.28.up_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.29.down_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.29.gate_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.29.up_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.3.down_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.3.gate_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.3.up_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.30.down_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.30.gate_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.30.up_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.31.down_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.31.gate_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.31.up_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.32.down_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.32.gate_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.32.up_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.33.down_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.33.gate_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.33.up_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.34.down_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.34.gate_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.34.up_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.35.down_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.35.gate_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.35.up_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.36.down_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.36.gate_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.36.up_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.37.down_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.37.gate_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.37.up_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.38.down_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.38.gate_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.38.up_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.39.down_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.39.gate_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.39.up_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.4.down_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.4.gate_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.4.up_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.40.down_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.40.gate_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.40.up_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.41.down_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.41.gate_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.41.up_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.42.down_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.42.gate_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.42.up_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.43.down_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.43.gate_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.43.up_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.44.down_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.44.gate_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.44.up_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.45.down_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.45.gate_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.45.up_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.46.down_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.46.gate_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.46.up_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.47.down_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.47.gate_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.47.up_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.48.down_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.48.gate_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.48.up_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.49.down_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.49.gate_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.49.up_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.5.down_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.5.gate_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.5.up_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.50.down_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.50.gate_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.50.up_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.51.down_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.51.gate_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.51.up_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.52.down_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.52.gate_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.52.up_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.53.down_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.53.gate_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.53.up_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.54.down_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.54.gate_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.54.up_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.55.down_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.55.gate_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.55.up_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.56.down_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.56.gate_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.56.up_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.57.down_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.57.gate_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.57.up_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.58.down_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.58.gate_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.58.up_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.59.down_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.59.gate_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.59.up_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.6.down_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.6.gate_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.6.up_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.60.down_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.60.gate_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.60.up_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.61.down_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.61.gate_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.61.up_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.62.down_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.62.gate_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.62.up_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.63.down_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.63.gate_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.63.up_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.64.down_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.64.gate_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.64.up_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.65.down_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.33.mlp.experts.65.gate_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.65.up_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.66.down_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.33.mlp.experts.66.gate_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.33.mlp.experts.66.up_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.33.mlp.experts.67.down_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.33.mlp.experts.67.gate_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.33.mlp.experts.67.up_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.33.mlp.experts.68.down_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.33.mlp.experts.68.gate_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.33.mlp.experts.68.up_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.33.mlp.experts.69.down_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.33.mlp.experts.69.gate_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.33.mlp.experts.69.up_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.33.mlp.experts.7.down_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.7.gate_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.7.up_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.70.down_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.33.mlp.experts.70.gate_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.33.mlp.experts.70.up_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.33.mlp.experts.71.down_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.33.mlp.experts.71.gate_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.33.mlp.experts.71.up_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.33.mlp.experts.72.down_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.33.mlp.experts.72.gate_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.33.mlp.experts.72.up_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.33.mlp.experts.73.down_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.33.mlp.experts.73.gate_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.33.mlp.experts.73.up_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.33.mlp.experts.74.down_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.33.mlp.experts.74.gate_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.33.mlp.experts.74.up_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.33.mlp.experts.75.down_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.33.mlp.experts.75.gate_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.33.mlp.experts.75.up_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.33.mlp.experts.76.down_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.33.mlp.experts.76.gate_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.33.mlp.experts.76.up_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.33.mlp.experts.77.down_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.33.mlp.experts.77.gate_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.33.mlp.experts.77.up_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.33.mlp.experts.78.down_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.33.mlp.experts.78.gate_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.33.mlp.experts.78.up_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.33.mlp.experts.79.down_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.33.mlp.experts.79.gate_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.33.mlp.experts.79.up_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.33.mlp.experts.8.down_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.8.gate_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.8.up_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.9.down_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.9.gate_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.experts.9.up_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.mlp.gate.e_score_correction_bias": "model-00027-of-00075.safetensors", + "model.layers.33.mlp.gate.weight": "model-00027-of-00075.safetensors", + "model.layers.33.mlp.shared_experts.down_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.33.mlp.shared_experts.gate_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.33.mlp.shared_experts.up_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.33.post_attention_layernorm.weight": "model-00027-of-00075.safetensors", + "model.layers.33.self_attn.k_norm.weight": "model-00026-of-00075.safetensors", + "model.layers.33.self_attn.k_proj.bias": "model-00026-of-00075.safetensors", + "model.layers.33.self_attn.k_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.self_attn.o_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.self_attn.q_norm.weight": "model-00026-of-00075.safetensors", + "model.layers.33.self_attn.q_proj.bias": "model-00026-of-00075.safetensors", + "model.layers.33.self_attn.q_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.33.self_attn.v_proj.bias": "model-00026-of-00075.safetensors", + "model.layers.33.self_attn.v_proj.weight": "model-00026-of-00075.safetensors", + "model.layers.34.input_layernorm.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.0.down_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.0.gate_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.0.up_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.1.down_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.1.gate_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.1.up_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.10.down_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.10.gate_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.10.up_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.11.down_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.11.gate_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.11.up_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.12.down_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.12.gate_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.12.up_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.13.down_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.13.gate_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.13.up_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.14.down_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.14.gate_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.14.up_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.15.down_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.15.gate_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.15.up_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.16.down_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.16.gate_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.16.up_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.17.down_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.17.gate_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.17.up_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.18.down_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.18.gate_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.18.up_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.19.down_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.19.gate_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.19.up_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.2.down_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.2.gate_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.2.up_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.20.down_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.20.gate_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.20.up_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.21.down_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.21.gate_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.21.up_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.22.down_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.22.gate_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.22.up_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.23.down_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.23.gate_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.23.up_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.24.down_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.24.gate_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.24.up_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.25.down_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.25.gate_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.25.up_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.26.down_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.26.gate_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.26.up_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.27.down_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.27.gate_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.27.up_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.28.down_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.28.gate_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.28.up_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.29.down_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.29.gate_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.29.up_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.3.down_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.3.gate_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.3.up_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.30.down_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.30.gate_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.30.up_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.31.down_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.31.gate_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.31.up_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.32.down_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.32.gate_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.32.up_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.33.down_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.33.gate_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.33.up_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.34.down_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.34.gate_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.34.up_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.35.down_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.35.gate_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.35.up_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.36.down_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.36.gate_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.36.up_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.37.down_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.37.gate_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.37.up_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.38.down_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.38.gate_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.38.up_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.39.down_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.39.gate_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.39.up_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.4.down_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.4.gate_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.4.up_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.40.down_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.40.gate_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.40.up_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.41.down_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.41.gate_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.41.up_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.42.down_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.42.gate_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.42.up_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.43.down_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.43.gate_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.43.up_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.44.down_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.44.gate_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.44.up_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.45.down_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.45.gate_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.45.up_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.46.down_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.46.gate_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.46.up_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.47.down_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.47.gate_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.47.up_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.48.down_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.48.gate_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.48.up_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.49.down_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.49.gate_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.49.up_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.5.down_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.5.gate_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.5.up_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.50.down_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.50.gate_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.50.up_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.51.down_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.51.gate_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.51.up_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.52.down_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.52.gate_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.52.up_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.53.down_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.53.gate_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.53.up_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.54.down_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.54.gate_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.54.up_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.55.down_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.55.gate_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.55.up_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.56.down_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.56.gate_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.56.up_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.57.down_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.57.gate_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.57.up_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.58.down_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.58.gate_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.58.up_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.59.down_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.59.gate_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.59.up_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.6.down_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.6.gate_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.6.up_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.60.down_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.60.gate_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.60.up_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.61.down_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.61.gate_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.61.up_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.62.down_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.62.gate_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.62.up_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.63.down_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.63.gate_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.63.up_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.64.down_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.64.gate_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.64.up_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.65.down_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.65.gate_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.65.up_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.66.down_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.66.gate_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.66.up_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.67.down_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.67.gate_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.67.up_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.68.down_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.68.gate_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.68.up_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.69.down_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.69.gate_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.69.up_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.7.down_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.7.gate_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.7.up_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.70.down_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.70.gate_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.70.up_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.71.down_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.71.gate_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.71.up_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.72.down_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.72.gate_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.72.up_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.73.down_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.73.gate_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.73.up_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.74.down_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.74.gate_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.74.up_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.75.down_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.75.gate_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.75.up_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.76.down_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.76.gate_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.76.up_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.77.down_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.77.gate_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.77.up_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.78.down_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.78.gate_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.78.up_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.79.down_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.79.gate_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.79.up_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.8.down_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.8.gate_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.8.up_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.9.down_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.9.gate_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.experts.9.up_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.gate.e_score_correction_bias": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.gate.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.shared_experts.down_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.shared_experts.gate_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.mlp.shared_experts.up_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.post_attention_layernorm.weight": "model-00027-of-00075.safetensors", + "model.layers.34.self_attn.k_norm.weight": "model-00027-of-00075.safetensors", + "model.layers.34.self_attn.k_proj.bias": "model-00027-of-00075.safetensors", + "model.layers.34.self_attn.k_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.self_attn.o_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.self_attn.q_norm.weight": "model-00027-of-00075.safetensors", + "model.layers.34.self_attn.q_proj.bias": "model-00027-of-00075.safetensors", + "model.layers.34.self_attn.q_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.34.self_attn.v_proj.bias": "model-00027-of-00075.safetensors", + "model.layers.34.self_attn.v_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.35.input_layernorm.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.0.down_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.0.gate_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.0.up_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.1.down_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.1.gate_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.1.up_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.10.down_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.10.gate_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.10.up_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.11.down_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.11.gate_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.11.up_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.12.down_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.12.gate_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.12.up_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.13.down_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.13.gate_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.13.up_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.14.down_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.14.gate_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.14.up_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.15.down_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.15.gate_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.15.up_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.16.down_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.16.gate_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.16.up_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.17.down_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.17.gate_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.17.up_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.18.down_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.18.gate_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.18.up_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.19.down_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.19.gate_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.19.up_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.2.down_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.2.gate_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.2.up_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.20.down_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.20.gate_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.20.up_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.21.down_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.21.gate_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.21.up_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.22.down_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.22.gate_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.22.up_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.23.down_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.23.gate_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.23.up_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.24.down_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.24.gate_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.24.up_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.25.down_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.25.gate_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.25.up_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.26.down_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.26.gate_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.26.up_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.27.down_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.27.gate_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.27.up_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.28.down_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.28.gate_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.28.up_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.29.down_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.29.gate_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.29.up_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.3.down_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.3.gate_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.3.up_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.30.down_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.30.gate_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.30.up_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.31.down_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.31.gate_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.31.up_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.32.down_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.32.gate_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.32.up_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.33.down_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.33.gate_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.33.up_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.34.down_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.34.gate_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.34.up_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.35.down_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.35.gate_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.35.up_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.36.down_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.36.gate_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.36.up_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.37.down_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.37.gate_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.37.up_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.38.down_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.38.gate_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.38.up_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.39.down_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.39.gate_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.39.up_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.4.down_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.4.gate_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.4.up_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.40.down_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.40.gate_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.40.up_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.41.down_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.41.gate_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.41.up_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.42.down_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.42.gate_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.42.up_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.43.down_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.43.gate_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.43.up_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.44.down_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.44.gate_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.44.up_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.45.down_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.45.gate_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.45.up_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.46.down_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.46.gate_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.46.up_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.47.down_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.47.gate_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.47.up_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.48.down_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.48.gate_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.48.up_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.49.down_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.49.gate_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.49.up_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.5.down_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.5.gate_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.5.up_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.50.down_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.50.gate_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.50.up_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.51.down_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.51.gate_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.51.up_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.52.down_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.52.gate_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.52.up_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.53.down_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.53.gate_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.53.up_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.54.down_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.54.gate_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.54.up_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.55.down_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.55.gate_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.55.up_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.56.down_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.56.gate_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.56.up_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.57.down_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.57.gate_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.57.up_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.58.down_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.58.gate_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.58.up_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.59.down_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.59.gate_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.59.up_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.6.down_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.6.gate_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.6.up_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.60.down_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.60.gate_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.60.up_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.61.down_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.61.gate_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.61.up_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.62.down_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.62.gate_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.62.up_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.63.down_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.63.gate_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.63.up_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.64.down_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.64.gate_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.64.up_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.65.down_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.65.gate_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.65.up_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.66.down_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.66.gate_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.66.up_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.67.down_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.67.gate_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.67.up_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.68.down_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.68.gate_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.68.up_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.69.down_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.69.gate_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.69.up_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.7.down_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.7.gate_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.7.up_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.70.down_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.70.gate_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.70.up_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.71.down_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.71.gate_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.71.up_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.72.down_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.72.gate_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.72.up_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.73.down_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.73.gate_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.73.up_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.74.down_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.74.gate_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.74.up_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.75.down_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.75.gate_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.75.up_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.76.down_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.76.gate_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.76.up_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.77.down_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.77.gate_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.77.up_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.78.down_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.78.gate_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.78.up_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.79.down_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.79.gate_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.79.up_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.8.down_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.8.gate_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.8.up_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.9.down_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.9.gate_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.experts.9.up_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.gate.e_score_correction_bias": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.gate.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.shared_experts.down_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.shared_experts.gate_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.mlp.shared_experts.up_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.post_attention_layernorm.weight": "model-00028-of-00075.safetensors", + "model.layers.35.self_attn.k_norm.weight": "model-00028-of-00075.safetensors", + "model.layers.35.self_attn.k_proj.bias": "model-00027-of-00075.safetensors", + "model.layers.35.self_attn.k_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.35.self_attn.o_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.35.self_attn.q_norm.weight": "model-00028-of-00075.safetensors", + "model.layers.35.self_attn.q_proj.bias": "model-00027-of-00075.safetensors", + "model.layers.35.self_attn.q_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.35.self_attn.v_proj.bias": "model-00027-of-00075.safetensors", + "model.layers.35.self_attn.v_proj.weight": "model-00027-of-00075.safetensors", + "model.layers.36.input_layernorm.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.0.down_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.36.mlp.experts.0.gate_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.36.mlp.experts.0.up_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.36.mlp.experts.1.down_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.36.mlp.experts.1.gate_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.36.mlp.experts.1.up_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.36.mlp.experts.10.down_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.36.mlp.experts.10.gate_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.36.mlp.experts.10.up_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.36.mlp.experts.11.down_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.36.mlp.experts.11.gate_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.36.mlp.experts.11.up_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.36.mlp.experts.12.down_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.36.mlp.experts.12.gate_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.36.mlp.experts.12.up_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.36.mlp.experts.13.down_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.36.mlp.experts.13.gate_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.36.mlp.experts.13.up_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.36.mlp.experts.14.down_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.36.mlp.experts.14.gate_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.36.mlp.experts.14.up_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.36.mlp.experts.15.down_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.36.mlp.experts.15.gate_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.36.mlp.experts.15.up_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.36.mlp.experts.16.down_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.16.gate_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.36.mlp.experts.16.up_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.17.down_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.17.gate_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.17.up_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.18.down_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.18.gate_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.18.up_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.19.down_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.19.gate_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.19.up_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.2.down_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.36.mlp.experts.2.gate_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.36.mlp.experts.2.up_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.36.mlp.experts.20.down_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.20.gate_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.20.up_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.21.down_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.21.gate_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.21.up_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.22.down_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.22.gate_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.22.up_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.23.down_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.23.gate_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.23.up_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.24.down_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.24.gate_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.24.up_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.25.down_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.25.gate_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.25.up_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.26.down_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.26.gate_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.26.up_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.27.down_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.27.gate_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.27.up_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.28.down_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.28.gate_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.28.up_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.29.down_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.29.gate_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.29.up_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.3.down_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.36.mlp.experts.3.gate_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.36.mlp.experts.3.up_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.36.mlp.experts.30.down_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.30.gate_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.30.up_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.31.down_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.31.gate_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.31.up_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.32.down_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.32.gate_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.32.up_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.33.down_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.33.gate_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.33.up_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.34.down_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.34.gate_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.34.up_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.35.down_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.35.gate_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.35.up_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.36.down_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.36.gate_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.36.up_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.37.down_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.37.gate_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.37.up_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.38.down_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.38.gate_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.38.up_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.39.down_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.39.gate_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.39.up_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.4.down_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.36.mlp.experts.4.gate_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.36.mlp.experts.4.up_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.36.mlp.experts.40.down_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.40.gate_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.40.up_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.41.down_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.41.gate_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.41.up_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.42.down_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.42.gate_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.42.up_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.43.down_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.43.gate_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.43.up_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.44.down_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.44.gate_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.44.up_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.45.down_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.45.gate_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.45.up_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.46.down_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.46.gate_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.46.up_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.47.down_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.47.gate_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.47.up_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.48.down_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.48.gate_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.48.up_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.49.down_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.49.gate_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.49.up_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.5.down_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.36.mlp.experts.5.gate_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.36.mlp.experts.5.up_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.36.mlp.experts.50.down_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.50.gate_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.50.up_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.51.down_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.51.gate_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.51.up_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.52.down_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.52.gate_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.52.up_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.53.down_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.53.gate_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.53.up_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.54.down_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.54.gate_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.54.up_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.55.down_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.55.gate_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.55.up_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.56.down_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.56.gate_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.56.up_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.57.down_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.57.gate_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.57.up_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.58.down_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.58.gate_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.58.up_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.59.down_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.59.gate_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.59.up_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.6.down_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.36.mlp.experts.6.gate_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.36.mlp.experts.6.up_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.36.mlp.experts.60.down_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.60.gate_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.60.up_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.61.down_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.61.gate_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.61.up_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.62.down_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.62.gate_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.62.up_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.63.down_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.63.gate_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.63.up_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.64.down_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.64.gate_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.64.up_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.65.down_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.65.gate_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.65.up_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.66.down_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.66.gate_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.66.up_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.67.down_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.67.gate_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.67.up_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.68.down_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.68.gate_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.68.up_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.69.down_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.69.gate_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.69.up_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.7.down_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.36.mlp.experts.7.gate_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.36.mlp.experts.7.up_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.36.mlp.experts.70.down_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.70.gate_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.70.up_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.71.down_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.71.gate_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.71.up_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.72.down_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.72.gate_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.72.up_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.73.down_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.73.gate_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.73.up_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.74.down_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.74.gate_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.74.up_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.75.down_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.75.gate_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.75.up_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.76.down_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.76.gate_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.76.up_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.77.down_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.77.gate_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.77.up_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.78.down_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.78.gate_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.78.up_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.79.down_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.79.gate_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.79.up_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.experts.8.down_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.36.mlp.experts.8.gate_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.36.mlp.experts.8.up_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.36.mlp.experts.9.down_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.36.mlp.experts.9.gate_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.36.mlp.experts.9.up_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.36.mlp.gate.e_score_correction_bias": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.gate.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.shared_experts.down_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.shared_experts.gate_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.mlp.shared_experts.up_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.36.post_attention_layernorm.weight": "model-00029-of-00075.safetensors", + "model.layers.36.self_attn.k_norm.weight": "model-00028-of-00075.safetensors", + "model.layers.36.self_attn.k_proj.bias": "model-00028-of-00075.safetensors", + "model.layers.36.self_attn.k_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.36.self_attn.o_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.36.self_attn.q_norm.weight": "model-00028-of-00075.safetensors", + "model.layers.36.self_attn.q_proj.bias": "model-00028-of-00075.safetensors", + "model.layers.36.self_attn.q_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.36.self_attn.v_proj.bias": "model-00028-of-00075.safetensors", + "model.layers.36.self_attn.v_proj.weight": "model-00028-of-00075.safetensors", + "model.layers.37.input_layernorm.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.0.down_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.37.mlp.experts.0.gate_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.37.mlp.experts.0.up_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.37.mlp.experts.1.down_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.37.mlp.experts.1.gate_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.37.mlp.experts.1.up_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.37.mlp.experts.10.down_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.37.mlp.experts.10.gate_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.37.mlp.experts.10.up_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.37.mlp.experts.11.down_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.37.mlp.experts.11.gate_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.37.mlp.experts.11.up_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.37.mlp.experts.12.down_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.37.mlp.experts.12.gate_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.37.mlp.experts.12.up_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.37.mlp.experts.13.down_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.37.mlp.experts.13.gate_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.37.mlp.experts.13.up_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.37.mlp.experts.14.down_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.37.mlp.experts.14.gate_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.37.mlp.experts.14.up_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.37.mlp.experts.15.down_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.37.mlp.experts.15.gate_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.37.mlp.experts.15.up_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.37.mlp.experts.16.down_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.37.mlp.experts.16.gate_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.37.mlp.experts.16.up_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.37.mlp.experts.17.down_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.37.mlp.experts.17.gate_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.37.mlp.experts.17.up_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.37.mlp.experts.18.down_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.37.mlp.experts.18.gate_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.37.mlp.experts.18.up_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.37.mlp.experts.19.down_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.37.mlp.experts.19.gate_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.37.mlp.experts.19.up_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.37.mlp.experts.2.down_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.37.mlp.experts.2.gate_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.37.mlp.experts.2.up_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.37.mlp.experts.20.down_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.37.mlp.experts.20.gate_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.37.mlp.experts.20.up_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.37.mlp.experts.21.down_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.37.mlp.experts.21.gate_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.37.mlp.experts.21.up_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.37.mlp.experts.22.down_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.37.mlp.experts.22.gate_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.37.mlp.experts.22.up_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.37.mlp.experts.23.down_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.37.mlp.experts.23.gate_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.37.mlp.experts.23.up_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.37.mlp.experts.24.down_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.37.mlp.experts.24.gate_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.37.mlp.experts.24.up_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.37.mlp.experts.25.down_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.37.mlp.experts.25.gate_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.37.mlp.experts.25.up_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.37.mlp.experts.26.down_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.37.mlp.experts.26.gate_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.37.mlp.experts.26.up_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.37.mlp.experts.27.down_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.37.mlp.experts.27.gate_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.37.mlp.experts.27.up_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.37.mlp.experts.28.down_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.37.mlp.experts.28.gate_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.37.mlp.experts.28.up_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.37.mlp.experts.29.down_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.37.mlp.experts.29.gate_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.37.mlp.experts.29.up_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.37.mlp.experts.3.down_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.37.mlp.experts.3.gate_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.37.mlp.experts.3.up_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.37.mlp.experts.30.down_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.37.mlp.experts.30.gate_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.37.mlp.experts.30.up_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.37.mlp.experts.31.down_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.37.mlp.experts.31.gate_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.37.mlp.experts.31.up_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.37.mlp.experts.32.down_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.37.mlp.experts.32.gate_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.37.mlp.experts.32.up_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.37.mlp.experts.33.down_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.37.mlp.experts.33.gate_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.37.mlp.experts.33.up_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.37.mlp.experts.34.down_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.37.mlp.experts.34.gate_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.37.mlp.experts.34.up_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.37.mlp.experts.35.down_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.35.gate_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.37.mlp.experts.35.up_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.36.down_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.36.gate_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.36.up_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.37.down_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.37.gate_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.37.up_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.38.down_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.38.gate_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.38.up_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.39.down_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.39.gate_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.39.up_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.4.down_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.37.mlp.experts.4.gate_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.37.mlp.experts.4.up_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.37.mlp.experts.40.down_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.40.gate_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.40.up_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.41.down_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.41.gate_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.41.up_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.42.down_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.42.gate_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.42.up_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.43.down_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.43.gate_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.43.up_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.44.down_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.44.gate_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.44.up_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.45.down_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.45.gate_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.45.up_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.46.down_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.46.gate_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.46.up_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.47.down_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.47.gate_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.47.up_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.48.down_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.48.gate_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.48.up_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.49.down_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.49.gate_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.49.up_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.5.down_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.37.mlp.experts.5.gate_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.37.mlp.experts.5.up_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.37.mlp.experts.50.down_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.50.gate_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.50.up_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.51.down_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.51.gate_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.51.up_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.52.down_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.52.gate_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.52.up_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.53.down_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.53.gate_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.53.up_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.54.down_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.54.gate_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.54.up_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.55.down_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.55.gate_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.55.up_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.56.down_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.56.gate_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.56.up_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.57.down_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.57.gate_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.57.up_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.58.down_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.58.gate_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.58.up_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.59.down_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.59.gate_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.59.up_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.6.down_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.37.mlp.experts.6.gate_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.37.mlp.experts.6.up_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.37.mlp.experts.60.down_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.60.gate_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.60.up_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.61.down_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.61.gate_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.61.up_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.62.down_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.62.gate_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.62.up_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.63.down_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.63.gate_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.63.up_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.64.down_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.64.gate_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.64.up_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.65.down_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.65.gate_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.65.up_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.66.down_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.66.gate_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.66.up_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.67.down_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.67.gate_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.67.up_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.68.down_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.68.gate_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.68.up_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.69.down_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.69.gate_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.69.up_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.7.down_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.37.mlp.experts.7.gate_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.37.mlp.experts.7.up_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.37.mlp.experts.70.down_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.70.gate_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.70.up_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.71.down_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.71.gate_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.71.up_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.72.down_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.72.gate_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.72.up_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.73.down_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.73.gate_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.73.up_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.74.down_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.74.gate_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.74.up_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.75.down_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.75.gate_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.75.up_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.76.down_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.76.gate_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.76.up_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.77.down_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.77.gate_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.77.up_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.78.down_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.78.gate_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.78.up_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.79.down_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.79.gate_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.79.up_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.experts.8.down_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.37.mlp.experts.8.gate_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.37.mlp.experts.8.up_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.37.mlp.experts.9.down_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.37.mlp.experts.9.gate_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.37.mlp.experts.9.up_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.37.mlp.gate.e_score_correction_bias": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.gate.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.shared_experts.down_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.shared_experts.gate_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.mlp.shared_experts.up_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.37.post_attention_layernorm.weight": "model-00030-of-00075.safetensors", + "model.layers.37.self_attn.k_norm.weight": "model-00029-of-00075.safetensors", + "model.layers.37.self_attn.k_proj.bias": "model-00029-of-00075.safetensors", + "model.layers.37.self_attn.k_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.37.self_attn.o_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.37.self_attn.q_norm.weight": "model-00029-of-00075.safetensors", + "model.layers.37.self_attn.q_proj.bias": "model-00029-of-00075.safetensors", + "model.layers.37.self_attn.q_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.37.self_attn.v_proj.bias": "model-00029-of-00075.safetensors", + "model.layers.37.self_attn.v_proj.weight": "model-00029-of-00075.safetensors", + "model.layers.38.input_layernorm.weight": "model-00031-of-00075.safetensors", + "model.layers.38.mlp.experts.0.down_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.0.gate_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.0.up_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.1.down_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.1.gate_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.1.up_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.10.down_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.10.gate_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.10.up_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.11.down_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.11.gate_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.11.up_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.12.down_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.12.gate_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.12.up_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.13.down_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.13.gate_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.13.up_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.14.down_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.14.gate_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.14.up_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.15.down_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.15.gate_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.15.up_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.16.down_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.16.gate_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.16.up_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.17.down_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.17.gate_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.17.up_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.18.down_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.18.gate_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.18.up_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.19.down_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.19.gate_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.19.up_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.2.down_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.2.gate_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.2.up_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.20.down_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.20.gate_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.20.up_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.21.down_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.21.gate_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.21.up_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.22.down_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.22.gate_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.22.up_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.23.down_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.23.gate_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.23.up_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.24.down_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.24.gate_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.24.up_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.25.down_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.25.gate_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.25.up_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.26.down_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.26.gate_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.26.up_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.27.down_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.27.gate_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.27.up_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.28.down_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.28.gate_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.28.up_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.29.down_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.29.gate_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.29.up_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.3.down_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.3.gate_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.3.up_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.30.down_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.30.gate_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.30.up_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.31.down_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.31.gate_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.31.up_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.32.down_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.32.gate_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.32.up_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.33.down_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.33.gate_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.33.up_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.34.down_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.34.gate_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.34.up_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.35.down_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.35.gate_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.35.up_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.36.down_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.36.gate_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.36.up_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.37.down_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.37.gate_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.37.up_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.38.down_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.38.gate_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.38.up_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.39.down_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.39.gate_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.39.up_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.4.down_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.4.gate_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.4.up_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.40.down_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.40.gate_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.40.up_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.41.down_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.41.gate_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.41.up_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.42.down_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.42.gate_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.42.up_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.43.down_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.43.gate_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.43.up_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.44.down_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.44.gate_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.44.up_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.45.down_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.45.gate_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.45.up_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.46.down_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.46.gate_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.46.up_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.47.down_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.47.gate_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.47.up_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.48.down_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.48.gate_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.48.up_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.49.down_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.49.gate_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.49.up_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.5.down_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.5.gate_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.5.up_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.50.down_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.50.gate_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.50.up_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.51.down_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.51.gate_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.51.up_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.52.down_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.52.gate_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.52.up_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.53.down_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.53.gate_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.53.up_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.54.down_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.38.mlp.experts.54.gate_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.54.up_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.38.mlp.experts.55.down_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.38.mlp.experts.55.gate_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.38.mlp.experts.55.up_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.38.mlp.experts.56.down_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.38.mlp.experts.56.gate_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.38.mlp.experts.56.up_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.38.mlp.experts.57.down_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.38.mlp.experts.57.gate_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.38.mlp.experts.57.up_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.38.mlp.experts.58.down_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.38.mlp.experts.58.gate_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.38.mlp.experts.58.up_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.38.mlp.experts.59.down_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.38.mlp.experts.59.gate_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.38.mlp.experts.59.up_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.38.mlp.experts.6.down_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.6.gate_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.6.up_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.60.down_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.38.mlp.experts.60.gate_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.38.mlp.experts.60.up_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.38.mlp.experts.61.down_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.38.mlp.experts.61.gate_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.38.mlp.experts.61.up_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.38.mlp.experts.62.down_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.38.mlp.experts.62.gate_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.38.mlp.experts.62.up_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.38.mlp.experts.63.down_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.38.mlp.experts.63.gate_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.38.mlp.experts.63.up_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.38.mlp.experts.64.down_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.38.mlp.experts.64.gate_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.38.mlp.experts.64.up_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.38.mlp.experts.65.down_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.38.mlp.experts.65.gate_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.38.mlp.experts.65.up_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.38.mlp.experts.66.down_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.38.mlp.experts.66.gate_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.38.mlp.experts.66.up_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.38.mlp.experts.67.down_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.38.mlp.experts.67.gate_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.38.mlp.experts.67.up_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.38.mlp.experts.68.down_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.38.mlp.experts.68.gate_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.38.mlp.experts.68.up_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.38.mlp.experts.69.down_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.38.mlp.experts.69.gate_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.38.mlp.experts.69.up_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.38.mlp.experts.7.down_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.7.gate_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.7.up_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.70.down_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.38.mlp.experts.70.gate_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.38.mlp.experts.70.up_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.38.mlp.experts.71.down_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.38.mlp.experts.71.gate_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.38.mlp.experts.71.up_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.38.mlp.experts.72.down_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.38.mlp.experts.72.gate_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.38.mlp.experts.72.up_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.38.mlp.experts.73.down_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.38.mlp.experts.73.gate_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.38.mlp.experts.73.up_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.38.mlp.experts.74.down_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.38.mlp.experts.74.gate_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.38.mlp.experts.74.up_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.38.mlp.experts.75.down_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.38.mlp.experts.75.gate_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.38.mlp.experts.75.up_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.38.mlp.experts.76.down_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.38.mlp.experts.76.gate_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.38.mlp.experts.76.up_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.38.mlp.experts.77.down_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.38.mlp.experts.77.gate_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.38.mlp.experts.77.up_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.38.mlp.experts.78.down_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.38.mlp.experts.78.gate_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.38.mlp.experts.78.up_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.38.mlp.experts.79.down_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.38.mlp.experts.79.gate_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.38.mlp.experts.79.up_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.38.mlp.experts.8.down_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.8.gate_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.8.up_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.9.down_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.9.gate_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.experts.9.up_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.mlp.gate.e_score_correction_bias": "model-00031-of-00075.safetensors", + "model.layers.38.mlp.gate.weight": "model-00031-of-00075.safetensors", + "model.layers.38.mlp.shared_experts.down_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.38.mlp.shared_experts.gate_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.38.mlp.shared_experts.up_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.38.post_attention_layernorm.weight": "model-00031-of-00075.safetensors", + "model.layers.38.self_attn.k_norm.weight": "model-00030-of-00075.safetensors", + "model.layers.38.self_attn.k_proj.bias": "model-00030-of-00075.safetensors", + "model.layers.38.self_attn.k_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.self_attn.o_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.self_attn.q_norm.weight": "model-00030-of-00075.safetensors", + "model.layers.38.self_attn.q_proj.bias": "model-00030-of-00075.safetensors", + "model.layers.38.self_attn.q_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.38.self_attn.v_proj.bias": "model-00030-of-00075.safetensors", + "model.layers.38.self_attn.v_proj.weight": "model-00030-of-00075.safetensors", + "model.layers.39.input_layernorm.weight": "model-00032-of-00075.safetensors", + "model.layers.39.mlp.experts.0.down_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.0.gate_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.0.up_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.1.down_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.1.gate_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.1.up_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.10.down_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.10.gate_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.10.up_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.11.down_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.11.gate_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.11.up_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.12.down_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.12.gate_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.12.up_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.13.down_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.13.gate_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.13.up_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.14.down_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.14.gate_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.14.up_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.15.down_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.15.gate_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.15.up_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.16.down_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.16.gate_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.16.up_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.17.down_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.17.gate_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.17.up_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.18.down_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.18.gate_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.18.up_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.19.down_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.19.gate_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.19.up_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.2.down_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.2.gate_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.2.up_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.20.down_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.20.gate_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.20.up_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.21.down_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.21.gate_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.21.up_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.22.down_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.22.gate_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.22.up_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.23.down_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.23.gate_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.23.up_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.24.down_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.24.gate_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.24.up_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.25.down_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.25.gate_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.25.up_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.26.down_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.26.gate_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.26.up_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.27.down_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.27.gate_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.27.up_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.28.down_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.28.gate_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.28.up_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.29.down_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.29.gate_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.29.up_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.3.down_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.3.gate_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.3.up_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.30.down_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.30.gate_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.30.up_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.31.down_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.31.gate_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.31.up_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.32.down_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.32.gate_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.32.up_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.33.down_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.33.gate_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.33.up_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.34.down_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.34.gate_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.34.up_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.35.down_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.35.gate_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.35.up_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.36.down_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.36.gate_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.36.up_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.37.down_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.37.gate_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.37.up_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.38.down_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.38.gate_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.38.up_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.39.down_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.39.gate_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.39.up_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.4.down_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.4.gate_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.4.up_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.40.down_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.40.gate_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.40.up_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.41.down_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.41.gate_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.41.up_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.42.down_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.42.gate_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.42.up_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.43.down_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.43.gate_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.43.up_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.44.down_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.44.gate_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.44.up_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.45.down_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.45.gate_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.45.up_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.46.down_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.46.gate_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.46.up_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.47.down_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.47.gate_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.47.up_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.48.down_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.48.gate_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.48.up_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.49.down_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.49.gate_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.49.up_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.5.down_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.5.gate_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.5.up_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.50.down_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.50.gate_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.50.up_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.51.down_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.51.gate_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.51.up_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.52.down_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.52.gate_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.52.up_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.53.down_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.53.gate_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.53.up_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.54.down_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.54.gate_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.54.up_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.55.down_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.55.gate_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.55.up_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.56.down_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.56.gate_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.56.up_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.57.down_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.57.gate_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.57.up_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.58.down_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.58.gate_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.58.up_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.59.down_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.59.gate_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.59.up_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.6.down_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.6.gate_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.6.up_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.60.down_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.60.gate_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.60.up_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.61.down_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.61.gate_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.61.up_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.62.down_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.62.gate_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.62.up_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.63.down_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.63.gate_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.63.up_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.64.down_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.64.gate_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.64.up_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.65.down_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.65.gate_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.65.up_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.66.down_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.66.gate_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.66.up_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.67.down_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.67.gate_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.67.up_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.68.down_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.68.gate_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.68.up_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.69.down_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.69.gate_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.69.up_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.7.down_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.7.gate_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.7.up_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.70.down_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.70.gate_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.70.up_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.71.down_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.71.gate_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.71.up_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.72.down_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.72.gate_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.72.up_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.73.down_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.39.mlp.experts.73.gate_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.73.up_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.39.mlp.experts.74.down_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.39.mlp.experts.74.gate_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.39.mlp.experts.74.up_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.39.mlp.experts.75.down_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.39.mlp.experts.75.gate_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.39.mlp.experts.75.up_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.39.mlp.experts.76.down_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.39.mlp.experts.76.gate_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.39.mlp.experts.76.up_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.39.mlp.experts.77.down_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.39.mlp.experts.77.gate_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.39.mlp.experts.77.up_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.39.mlp.experts.78.down_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.39.mlp.experts.78.gate_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.39.mlp.experts.78.up_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.39.mlp.experts.79.down_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.39.mlp.experts.79.gate_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.39.mlp.experts.79.up_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.39.mlp.experts.8.down_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.8.gate_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.8.up_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.9.down_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.9.gate_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.experts.9.up_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.mlp.gate.e_score_correction_bias": "model-00032-of-00075.safetensors", + "model.layers.39.mlp.gate.weight": "model-00032-of-00075.safetensors", + "model.layers.39.mlp.shared_experts.down_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.39.mlp.shared_experts.gate_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.39.mlp.shared_experts.up_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.39.post_attention_layernorm.weight": "model-00032-of-00075.safetensors", + "model.layers.39.self_attn.k_norm.weight": "model-00031-of-00075.safetensors", + "model.layers.39.self_attn.k_proj.bias": "model-00031-of-00075.safetensors", + "model.layers.39.self_attn.k_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.self_attn.o_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.self_attn.q_norm.weight": "model-00031-of-00075.safetensors", + "model.layers.39.self_attn.q_proj.bias": "model-00031-of-00075.safetensors", + "model.layers.39.self_attn.q_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.39.self_attn.v_proj.bias": "model-00031-of-00075.safetensors", + "model.layers.39.self_attn.v_proj.weight": "model-00031-of-00075.safetensors", + "model.layers.4.input_layernorm.weight": "model-00003-of-00075.safetensors", + "model.layers.4.mlp.experts.0.down_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.0.gate_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.0.up_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.1.down_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.1.gate_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.1.up_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.10.down_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.10.gate_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.10.up_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.11.down_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.11.gate_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.11.up_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.12.down_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.12.gate_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.12.up_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.13.down_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.13.gate_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.13.up_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.14.down_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.14.gate_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.14.up_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.15.down_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.15.gate_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.15.up_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.16.down_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.16.gate_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.16.up_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.17.down_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.17.gate_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.17.up_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.18.down_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.18.gate_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.18.up_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.19.down_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.19.gate_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.19.up_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.2.down_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.2.gate_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.2.up_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.20.down_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.20.gate_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.20.up_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.21.down_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.21.gate_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.21.up_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.22.down_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.22.gate_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.22.up_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.23.down_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.23.gate_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.23.up_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.24.down_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.24.gate_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.24.up_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.25.down_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.25.gate_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.25.up_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.26.down_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.26.gate_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.26.up_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.27.down_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.27.gate_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.27.up_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.28.down_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.28.gate_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.28.up_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.29.down_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.29.gate_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.29.up_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.3.down_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.3.gate_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.3.up_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.30.down_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.30.gate_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.30.up_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.31.down_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.31.gate_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.31.up_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.32.down_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.32.gate_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.32.up_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.33.down_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.33.gate_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.33.up_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.34.down_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.34.gate_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.34.up_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.35.down_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.35.gate_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.35.up_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.36.down_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.36.gate_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.36.up_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.37.down_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.37.gate_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.37.up_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.38.down_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.38.gate_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.38.up_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.39.down_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.39.gate_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.39.up_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.4.down_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.4.gate_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.4.up_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.40.down_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.40.gate_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.40.up_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.41.down_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.41.gate_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.41.up_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.42.down_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.42.gate_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.42.up_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.43.down_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.43.gate_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.43.up_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.44.down_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.4.mlp.experts.44.gate_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.44.up_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.45.down_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.4.mlp.experts.45.gate_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.4.mlp.experts.45.up_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.4.mlp.experts.46.down_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.4.mlp.experts.46.gate_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.4.mlp.experts.46.up_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.4.mlp.experts.47.down_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.4.mlp.experts.47.gate_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.4.mlp.experts.47.up_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.4.mlp.experts.48.down_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.4.mlp.experts.48.gate_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.4.mlp.experts.48.up_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.4.mlp.experts.49.down_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.4.mlp.experts.49.gate_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.4.mlp.experts.49.up_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.4.mlp.experts.5.down_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.5.gate_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.5.up_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.50.down_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.4.mlp.experts.50.gate_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.4.mlp.experts.50.up_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.4.mlp.experts.51.down_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.4.mlp.experts.51.gate_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.4.mlp.experts.51.up_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.4.mlp.experts.52.down_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.4.mlp.experts.52.gate_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.4.mlp.experts.52.up_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.4.mlp.experts.53.down_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.4.mlp.experts.53.gate_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.4.mlp.experts.53.up_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.4.mlp.experts.54.down_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.4.mlp.experts.54.gate_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.4.mlp.experts.54.up_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.4.mlp.experts.55.down_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.4.mlp.experts.55.gate_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.4.mlp.experts.55.up_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.4.mlp.experts.56.down_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.4.mlp.experts.56.gate_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.4.mlp.experts.56.up_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.4.mlp.experts.57.down_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.4.mlp.experts.57.gate_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.4.mlp.experts.57.up_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.4.mlp.experts.58.down_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.4.mlp.experts.58.gate_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.4.mlp.experts.58.up_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.4.mlp.experts.59.down_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.4.mlp.experts.59.gate_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.4.mlp.experts.59.up_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.4.mlp.experts.6.down_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.6.gate_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.6.up_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.60.down_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.4.mlp.experts.60.gate_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.4.mlp.experts.60.up_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.4.mlp.experts.61.down_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.4.mlp.experts.61.gate_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.4.mlp.experts.61.up_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.4.mlp.experts.62.down_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.4.mlp.experts.62.gate_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.4.mlp.experts.62.up_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.4.mlp.experts.63.down_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.4.mlp.experts.63.gate_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.4.mlp.experts.63.up_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.4.mlp.experts.64.down_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.4.mlp.experts.64.gate_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.4.mlp.experts.64.up_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.4.mlp.experts.65.down_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.4.mlp.experts.65.gate_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.4.mlp.experts.65.up_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.4.mlp.experts.66.down_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.4.mlp.experts.66.gate_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.4.mlp.experts.66.up_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.4.mlp.experts.67.down_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.4.mlp.experts.67.gate_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.4.mlp.experts.67.up_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.4.mlp.experts.68.down_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.4.mlp.experts.68.gate_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.4.mlp.experts.68.up_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.4.mlp.experts.69.down_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.4.mlp.experts.69.gate_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.4.mlp.experts.69.up_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.4.mlp.experts.7.down_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.7.gate_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.7.up_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.70.down_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.4.mlp.experts.70.gate_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.4.mlp.experts.70.up_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.4.mlp.experts.71.down_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.4.mlp.experts.71.gate_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.4.mlp.experts.71.up_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.4.mlp.experts.72.down_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.4.mlp.experts.72.gate_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.4.mlp.experts.72.up_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.4.mlp.experts.73.down_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.4.mlp.experts.73.gate_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.4.mlp.experts.73.up_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.4.mlp.experts.74.down_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.4.mlp.experts.74.gate_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.4.mlp.experts.74.up_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.4.mlp.experts.75.down_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.4.mlp.experts.75.gate_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.4.mlp.experts.75.up_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.4.mlp.experts.76.down_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.4.mlp.experts.76.gate_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.4.mlp.experts.76.up_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.4.mlp.experts.77.down_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.4.mlp.experts.77.gate_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.4.mlp.experts.77.up_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.4.mlp.experts.78.down_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.4.mlp.experts.78.gate_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.4.mlp.experts.78.up_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.4.mlp.experts.79.down_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.4.mlp.experts.79.gate_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.4.mlp.experts.79.up_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.4.mlp.experts.8.down_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.8.gate_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.8.up_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.9.down_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.9.gate_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.experts.9.up_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.mlp.gate.e_score_correction_bias": "model-00003-of-00075.safetensors", + "model.layers.4.mlp.gate.weight": "model-00003-of-00075.safetensors", + "model.layers.4.mlp.shared_experts.down_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.4.mlp.shared_experts.gate_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.4.mlp.shared_experts.up_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00003-of-00075.safetensors", + "model.layers.4.self_attn.k_norm.weight": "model-00002-of-00075.safetensors", + "model.layers.4.self_attn.k_proj.bias": "model-00002-of-00075.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.self_attn.q_norm.weight": "model-00002-of-00075.safetensors", + "model.layers.4.self_attn.q_proj.bias": "model-00002-of-00075.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.4.self_attn.v_proj.bias": "model-00002-of-00075.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00002-of-00075.safetensors", + "model.layers.40.input_layernorm.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.0.down_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.0.gate_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.0.up_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.1.down_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.1.gate_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.1.up_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.10.down_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.10.gate_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.10.up_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.11.down_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.11.gate_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.11.up_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.12.down_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.12.gate_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.12.up_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.13.down_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.13.gate_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.13.up_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.14.down_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.14.gate_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.14.up_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.15.down_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.15.gate_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.15.up_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.16.down_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.16.gate_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.16.up_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.17.down_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.17.gate_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.17.up_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.18.down_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.18.gate_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.18.up_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.19.down_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.19.gate_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.19.up_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.2.down_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.2.gate_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.2.up_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.20.down_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.20.gate_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.20.up_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.21.down_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.21.gate_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.21.up_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.22.down_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.22.gate_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.22.up_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.23.down_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.23.gate_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.23.up_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.24.down_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.24.gate_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.24.up_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.25.down_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.25.gate_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.25.up_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.26.down_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.26.gate_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.26.up_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.27.down_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.27.gate_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.27.up_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.28.down_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.28.gate_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.28.up_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.29.down_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.29.gate_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.29.up_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.3.down_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.3.gate_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.3.up_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.30.down_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.30.gate_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.30.up_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.31.down_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.31.gate_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.31.up_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.32.down_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.32.gate_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.32.up_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.33.down_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.33.gate_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.33.up_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.34.down_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.34.gate_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.34.up_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.35.down_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.35.gate_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.35.up_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.36.down_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.36.gate_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.36.up_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.37.down_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.37.gate_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.37.up_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.38.down_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.38.gate_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.38.up_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.39.down_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.39.gate_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.39.up_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.4.down_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.4.gate_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.4.up_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.40.down_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.40.gate_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.40.up_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.41.down_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.41.gate_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.41.up_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.42.down_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.42.gate_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.42.up_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.43.down_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.43.gate_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.43.up_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.44.down_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.44.gate_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.44.up_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.45.down_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.45.gate_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.45.up_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.46.down_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.46.gate_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.46.up_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.47.down_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.47.gate_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.47.up_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.48.down_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.48.gate_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.48.up_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.49.down_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.49.gate_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.49.up_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.5.down_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.5.gate_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.5.up_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.50.down_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.50.gate_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.50.up_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.51.down_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.51.gate_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.51.up_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.52.down_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.52.gate_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.52.up_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.53.down_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.53.gate_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.53.up_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.54.down_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.54.gate_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.54.up_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.55.down_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.55.gate_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.55.up_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.56.down_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.56.gate_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.56.up_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.57.down_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.57.gate_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.57.up_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.58.down_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.58.gate_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.58.up_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.59.down_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.59.gate_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.59.up_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.6.down_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.6.gate_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.6.up_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.60.down_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.60.gate_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.60.up_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.61.down_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.61.gate_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.61.up_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.62.down_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.62.gate_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.62.up_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.63.down_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.63.gate_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.63.up_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.64.down_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.64.gate_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.64.up_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.65.down_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.65.gate_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.65.up_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.66.down_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.66.gate_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.66.up_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.67.down_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.67.gate_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.67.up_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.68.down_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.68.gate_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.68.up_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.69.down_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.69.gate_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.69.up_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.7.down_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.7.gate_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.7.up_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.70.down_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.70.gate_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.70.up_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.71.down_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.71.gate_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.71.up_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.72.down_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.72.gate_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.72.up_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.73.down_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.73.gate_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.73.up_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.74.down_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.74.gate_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.74.up_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.75.down_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.75.gate_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.75.up_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.76.down_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.76.gate_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.76.up_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.77.down_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.77.gate_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.77.up_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.78.down_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.78.gate_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.78.up_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.79.down_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.79.gate_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.79.up_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.8.down_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.8.gate_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.8.up_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.9.down_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.9.gate_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.experts.9.up_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.gate.e_score_correction_bias": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.gate.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.shared_experts.down_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.shared_experts.gate_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.mlp.shared_experts.up_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.post_attention_layernorm.weight": "model-00032-of-00075.safetensors", + "model.layers.40.self_attn.k_norm.weight": "model-00032-of-00075.safetensors", + "model.layers.40.self_attn.k_proj.bias": "model-00032-of-00075.safetensors", + "model.layers.40.self_attn.k_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.self_attn.o_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.self_attn.q_norm.weight": "model-00032-of-00075.safetensors", + "model.layers.40.self_attn.q_proj.bias": "model-00032-of-00075.safetensors", + "model.layers.40.self_attn.q_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.40.self_attn.v_proj.bias": "model-00032-of-00075.safetensors", + "model.layers.40.self_attn.v_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.41.input_layernorm.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.0.down_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.41.mlp.experts.0.gate_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.41.mlp.experts.0.up_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.41.mlp.experts.1.down_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.41.mlp.experts.1.gate_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.41.mlp.experts.1.up_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.41.mlp.experts.10.down_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.10.gate_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.10.up_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.11.down_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.11.gate_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.11.up_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.12.down_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.12.gate_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.12.up_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.13.down_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.13.gate_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.13.up_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.14.down_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.14.gate_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.14.up_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.15.down_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.15.gate_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.15.up_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.16.down_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.16.gate_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.16.up_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.17.down_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.17.gate_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.17.up_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.18.down_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.18.gate_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.18.up_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.19.down_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.19.gate_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.19.up_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.2.down_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.41.mlp.experts.2.gate_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.41.mlp.experts.2.up_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.41.mlp.experts.20.down_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.20.gate_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.20.up_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.21.down_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.21.gate_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.21.up_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.22.down_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.22.gate_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.22.up_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.23.down_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.23.gate_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.23.up_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.24.down_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.24.gate_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.24.up_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.25.down_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.25.gate_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.25.up_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.26.down_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.26.gate_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.26.up_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.27.down_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.27.gate_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.27.up_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.28.down_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.28.gate_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.28.up_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.29.down_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.29.gate_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.29.up_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.3.down_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.41.mlp.experts.3.gate_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.41.mlp.experts.3.up_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.41.mlp.experts.30.down_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.30.gate_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.30.up_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.31.down_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.31.gate_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.31.up_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.32.down_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.32.gate_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.32.up_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.33.down_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.33.gate_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.33.up_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.34.down_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.34.gate_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.34.up_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.35.down_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.35.gate_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.35.up_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.36.down_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.36.gate_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.36.up_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.37.down_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.37.gate_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.37.up_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.38.down_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.38.gate_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.38.up_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.39.down_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.39.gate_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.39.up_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.4.down_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.41.mlp.experts.4.gate_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.41.mlp.experts.4.up_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.41.mlp.experts.40.down_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.40.gate_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.40.up_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.41.down_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.41.gate_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.41.up_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.42.down_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.42.gate_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.42.up_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.43.down_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.43.gate_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.43.up_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.44.down_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.44.gate_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.44.up_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.45.down_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.45.gate_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.45.up_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.46.down_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.46.gate_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.46.up_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.47.down_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.47.gate_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.47.up_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.48.down_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.48.gate_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.48.up_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.49.down_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.49.gate_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.49.up_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.5.down_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.5.gate_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.41.mlp.experts.5.up_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.41.mlp.experts.50.down_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.50.gate_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.50.up_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.51.down_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.51.gate_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.51.up_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.52.down_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.52.gate_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.52.up_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.53.down_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.53.gate_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.53.up_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.54.down_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.54.gate_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.54.up_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.55.down_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.55.gate_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.55.up_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.56.down_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.56.gate_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.56.up_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.57.down_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.57.gate_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.57.up_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.58.down_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.58.gate_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.58.up_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.59.down_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.59.gate_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.59.up_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.6.down_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.6.gate_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.6.up_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.60.down_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.60.gate_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.60.up_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.61.down_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.61.gate_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.61.up_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.62.down_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.62.gate_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.62.up_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.63.down_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.63.gate_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.63.up_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.64.down_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.64.gate_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.64.up_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.65.down_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.65.gate_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.65.up_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.66.down_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.66.gate_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.66.up_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.67.down_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.67.gate_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.67.up_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.68.down_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.68.gate_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.68.up_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.69.down_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.69.gate_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.69.up_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.7.down_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.7.gate_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.7.up_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.70.down_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.70.gate_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.70.up_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.71.down_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.71.gate_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.71.up_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.72.down_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.72.gate_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.72.up_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.73.down_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.73.gate_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.73.up_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.74.down_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.74.gate_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.74.up_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.75.down_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.75.gate_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.75.up_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.76.down_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.76.gate_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.76.up_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.77.down_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.77.gate_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.77.up_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.78.down_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.78.gate_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.78.up_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.79.down_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.79.gate_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.79.up_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.8.down_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.8.gate_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.8.up_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.9.down_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.9.gate_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.experts.9.up_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.gate.e_score_correction_bias": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.gate.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.shared_experts.down_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.shared_experts.gate_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.mlp.shared_experts.up_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.41.post_attention_layernorm.weight": "model-00033-of-00075.safetensors", + "model.layers.41.self_attn.k_norm.weight": "model-00032-of-00075.safetensors", + "model.layers.41.self_attn.k_proj.bias": "model-00032-of-00075.safetensors", + "model.layers.41.self_attn.k_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.41.self_attn.o_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.41.self_attn.q_norm.weight": "model-00032-of-00075.safetensors", + "model.layers.41.self_attn.q_proj.bias": "model-00032-of-00075.safetensors", + "model.layers.41.self_attn.q_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.41.self_attn.v_proj.bias": "model-00032-of-00075.safetensors", + "model.layers.41.self_attn.v_proj.weight": "model-00032-of-00075.safetensors", + "model.layers.42.input_layernorm.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.0.down_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.42.mlp.experts.0.gate_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.42.mlp.experts.0.up_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.42.mlp.experts.1.down_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.42.mlp.experts.1.gate_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.42.mlp.experts.1.up_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.42.mlp.experts.10.down_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.42.mlp.experts.10.gate_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.42.mlp.experts.10.up_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.42.mlp.experts.11.down_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.42.mlp.experts.11.gate_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.42.mlp.experts.11.up_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.42.mlp.experts.12.down_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.42.mlp.experts.12.gate_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.42.mlp.experts.12.up_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.42.mlp.experts.13.down_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.42.mlp.experts.13.gate_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.42.mlp.experts.13.up_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.42.mlp.experts.14.down_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.42.mlp.experts.14.gate_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.42.mlp.experts.14.up_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.42.mlp.experts.15.down_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.42.mlp.experts.15.gate_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.42.mlp.experts.15.up_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.42.mlp.experts.16.down_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.42.mlp.experts.16.gate_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.42.mlp.experts.16.up_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.42.mlp.experts.17.down_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.42.mlp.experts.17.gate_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.42.mlp.experts.17.up_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.42.mlp.experts.18.down_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.42.mlp.experts.18.gate_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.42.mlp.experts.18.up_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.42.mlp.experts.19.down_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.42.mlp.experts.19.gate_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.42.mlp.experts.19.up_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.42.mlp.experts.2.down_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.42.mlp.experts.2.gate_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.42.mlp.experts.2.up_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.42.mlp.experts.20.down_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.42.mlp.experts.20.gate_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.42.mlp.experts.20.up_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.42.mlp.experts.21.down_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.42.mlp.experts.21.gate_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.42.mlp.experts.21.up_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.42.mlp.experts.22.down_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.42.mlp.experts.22.gate_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.42.mlp.experts.22.up_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.42.mlp.experts.23.down_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.42.mlp.experts.23.gate_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.42.mlp.experts.23.up_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.42.mlp.experts.24.down_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.24.gate_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.42.mlp.experts.24.up_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.42.mlp.experts.25.down_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.25.gate_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.25.up_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.26.down_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.26.gate_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.26.up_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.27.down_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.27.gate_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.27.up_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.28.down_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.28.gate_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.28.up_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.29.down_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.29.gate_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.29.up_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.3.down_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.42.mlp.experts.3.gate_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.42.mlp.experts.3.up_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.42.mlp.experts.30.down_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.30.gate_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.30.up_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.31.down_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.31.gate_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.31.up_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.32.down_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.32.gate_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.32.up_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.33.down_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.33.gate_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.33.up_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.34.down_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.34.gate_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.34.up_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.35.down_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.35.gate_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.35.up_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.36.down_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.36.gate_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.36.up_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.37.down_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.37.gate_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.37.up_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.38.down_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.38.gate_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.38.up_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.39.down_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.39.gate_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.39.up_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.4.down_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.42.mlp.experts.4.gate_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.42.mlp.experts.4.up_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.42.mlp.experts.40.down_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.40.gate_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.40.up_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.41.down_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.41.gate_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.41.up_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.42.down_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.42.gate_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.42.up_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.43.down_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.43.gate_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.43.up_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.44.down_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.44.gate_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.44.up_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.45.down_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.45.gate_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.45.up_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.46.down_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.46.gate_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.46.up_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.47.down_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.47.gate_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.47.up_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.48.down_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.48.gate_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.48.up_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.49.down_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.49.gate_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.49.up_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.5.down_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.42.mlp.experts.5.gate_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.42.mlp.experts.5.up_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.42.mlp.experts.50.down_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.50.gate_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.50.up_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.51.down_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.51.gate_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.51.up_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.52.down_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.52.gate_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.52.up_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.53.down_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.53.gate_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.53.up_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.54.down_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.54.gate_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.54.up_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.55.down_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.55.gate_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.55.up_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.56.down_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.56.gate_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.56.up_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.57.down_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.57.gate_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.57.up_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.58.down_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.58.gate_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.58.up_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.59.down_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.59.gate_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.59.up_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.6.down_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.42.mlp.experts.6.gate_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.42.mlp.experts.6.up_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.42.mlp.experts.60.down_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.60.gate_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.60.up_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.61.down_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.61.gate_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.61.up_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.62.down_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.62.gate_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.62.up_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.63.down_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.63.gate_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.63.up_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.64.down_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.64.gate_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.64.up_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.65.down_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.65.gate_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.65.up_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.66.down_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.66.gate_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.66.up_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.67.down_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.67.gate_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.67.up_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.68.down_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.68.gate_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.68.up_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.69.down_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.69.gate_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.69.up_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.7.down_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.42.mlp.experts.7.gate_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.42.mlp.experts.7.up_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.42.mlp.experts.70.down_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.70.gate_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.70.up_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.71.down_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.71.gate_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.71.up_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.72.down_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.72.gate_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.72.up_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.73.down_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.73.gate_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.73.up_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.74.down_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.74.gate_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.74.up_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.75.down_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.75.gate_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.75.up_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.76.down_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.76.gate_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.76.up_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.77.down_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.77.gate_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.77.up_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.78.down_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.78.gate_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.78.up_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.79.down_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.79.gate_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.79.up_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.experts.8.down_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.42.mlp.experts.8.gate_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.42.mlp.experts.8.up_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.42.mlp.experts.9.down_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.42.mlp.experts.9.gate_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.42.mlp.experts.9.up_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.42.mlp.gate.e_score_correction_bias": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.gate.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.shared_experts.down_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.shared_experts.gate_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.mlp.shared_experts.up_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.42.post_attention_layernorm.weight": "model-00034-of-00075.safetensors", + "model.layers.42.self_attn.k_norm.weight": "model-00033-of-00075.safetensors", + "model.layers.42.self_attn.k_proj.bias": "model-00033-of-00075.safetensors", + "model.layers.42.self_attn.k_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.42.self_attn.o_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.42.self_attn.q_norm.weight": "model-00033-of-00075.safetensors", + "model.layers.42.self_attn.q_proj.bias": "model-00033-of-00075.safetensors", + "model.layers.42.self_attn.q_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.42.self_attn.v_proj.bias": "model-00033-of-00075.safetensors", + "model.layers.42.self_attn.v_proj.weight": "model-00033-of-00075.safetensors", + "model.layers.43.input_layernorm.weight": "model-00035-of-00075.safetensors", + "model.layers.43.mlp.experts.0.down_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.0.gate_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.0.up_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.1.down_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.1.gate_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.1.up_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.10.down_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.10.gate_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.10.up_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.11.down_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.11.gate_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.11.up_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.12.down_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.12.gate_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.12.up_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.13.down_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.13.gate_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.13.up_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.14.down_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.14.gate_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.14.up_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.15.down_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.15.gate_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.15.up_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.16.down_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.16.gate_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.16.up_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.17.down_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.17.gate_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.17.up_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.18.down_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.18.gate_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.18.up_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.19.down_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.19.gate_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.19.up_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.2.down_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.2.gate_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.2.up_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.20.down_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.20.gate_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.20.up_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.21.down_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.21.gate_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.21.up_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.22.down_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.22.gate_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.22.up_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.23.down_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.23.gate_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.23.up_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.24.down_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.24.gate_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.24.up_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.25.down_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.25.gate_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.25.up_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.26.down_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.26.gate_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.26.up_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.27.down_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.27.gate_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.27.up_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.28.down_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.28.gate_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.28.up_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.29.down_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.29.gate_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.29.up_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.3.down_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.3.gate_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.3.up_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.30.down_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.30.gate_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.30.up_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.31.down_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.31.gate_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.31.up_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.32.down_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.32.gate_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.32.up_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.33.down_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.33.gate_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.33.up_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.34.down_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.34.gate_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.34.up_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.35.down_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.35.gate_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.35.up_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.36.down_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.36.gate_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.36.up_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.37.down_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.37.gate_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.37.up_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.38.down_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.38.gate_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.38.up_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.39.down_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.39.gate_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.39.up_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.4.down_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.4.gate_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.4.up_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.40.down_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.40.gate_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.40.up_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.41.down_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.41.gate_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.41.up_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.42.down_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.42.gate_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.42.up_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.43.down_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.43.mlp.experts.43.gate_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.43.up_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.44.down_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.43.mlp.experts.44.gate_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.43.mlp.experts.44.up_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.43.mlp.experts.45.down_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.43.mlp.experts.45.gate_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.43.mlp.experts.45.up_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.43.mlp.experts.46.down_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.43.mlp.experts.46.gate_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.43.mlp.experts.46.up_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.43.mlp.experts.47.down_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.43.mlp.experts.47.gate_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.43.mlp.experts.47.up_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.43.mlp.experts.48.down_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.43.mlp.experts.48.gate_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.43.mlp.experts.48.up_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.43.mlp.experts.49.down_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.43.mlp.experts.49.gate_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.43.mlp.experts.49.up_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.43.mlp.experts.5.down_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.5.gate_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.5.up_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.50.down_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.43.mlp.experts.50.gate_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.43.mlp.experts.50.up_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.43.mlp.experts.51.down_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.43.mlp.experts.51.gate_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.43.mlp.experts.51.up_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.43.mlp.experts.52.down_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.43.mlp.experts.52.gate_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.43.mlp.experts.52.up_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.43.mlp.experts.53.down_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.43.mlp.experts.53.gate_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.43.mlp.experts.53.up_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.43.mlp.experts.54.down_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.43.mlp.experts.54.gate_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.43.mlp.experts.54.up_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.43.mlp.experts.55.down_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.43.mlp.experts.55.gate_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.43.mlp.experts.55.up_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.43.mlp.experts.56.down_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.43.mlp.experts.56.gate_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.43.mlp.experts.56.up_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.43.mlp.experts.57.down_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.43.mlp.experts.57.gate_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.43.mlp.experts.57.up_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.43.mlp.experts.58.down_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.43.mlp.experts.58.gate_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.43.mlp.experts.58.up_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.43.mlp.experts.59.down_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.43.mlp.experts.59.gate_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.43.mlp.experts.59.up_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.43.mlp.experts.6.down_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.6.gate_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.6.up_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.60.down_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.43.mlp.experts.60.gate_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.43.mlp.experts.60.up_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.43.mlp.experts.61.down_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.43.mlp.experts.61.gate_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.43.mlp.experts.61.up_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.43.mlp.experts.62.down_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.43.mlp.experts.62.gate_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.43.mlp.experts.62.up_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.43.mlp.experts.63.down_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.43.mlp.experts.63.gate_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.43.mlp.experts.63.up_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.43.mlp.experts.64.down_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.43.mlp.experts.64.gate_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.43.mlp.experts.64.up_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.43.mlp.experts.65.down_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.43.mlp.experts.65.gate_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.43.mlp.experts.65.up_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.43.mlp.experts.66.down_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.43.mlp.experts.66.gate_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.43.mlp.experts.66.up_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.43.mlp.experts.67.down_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.43.mlp.experts.67.gate_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.43.mlp.experts.67.up_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.43.mlp.experts.68.down_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.43.mlp.experts.68.gate_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.43.mlp.experts.68.up_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.43.mlp.experts.69.down_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.43.mlp.experts.69.gate_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.43.mlp.experts.69.up_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.43.mlp.experts.7.down_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.7.gate_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.7.up_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.70.down_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.43.mlp.experts.70.gate_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.43.mlp.experts.70.up_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.43.mlp.experts.71.down_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.43.mlp.experts.71.gate_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.43.mlp.experts.71.up_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.43.mlp.experts.72.down_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.43.mlp.experts.72.gate_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.43.mlp.experts.72.up_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.43.mlp.experts.73.down_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.43.mlp.experts.73.gate_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.43.mlp.experts.73.up_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.43.mlp.experts.74.down_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.43.mlp.experts.74.gate_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.43.mlp.experts.74.up_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.43.mlp.experts.75.down_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.43.mlp.experts.75.gate_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.43.mlp.experts.75.up_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.43.mlp.experts.76.down_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.43.mlp.experts.76.gate_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.43.mlp.experts.76.up_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.43.mlp.experts.77.down_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.43.mlp.experts.77.gate_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.43.mlp.experts.77.up_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.43.mlp.experts.78.down_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.43.mlp.experts.78.gate_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.43.mlp.experts.78.up_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.43.mlp.experts.79.down_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.43.mlp.experts.79.gate_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.43.mlp.experts.79.up_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.43.mlp.experts.8.down_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.8.gate_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.8.up_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.9.down_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.9.gate_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.experts.9.up_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.mlp.gate.e_score_correction_bias": "model-00035-of-00075.safetensors", + "model.layers.43.mlp.gate.weight": "model-00035-of-00075.safetensors", + "model.layers.43.mlp.shared_experts.down_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.43.mlp.shared_experts.gate_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.43.mlp.shared_experts.up_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.43.post_attention_layernorm.weight": "model-00035-of-00075.safetensors", + "model.layers.43.self_attn.k_norm.weight": "model-00034-of-00075.safetensors", + "model.layers.43.self_attn.k_proj.bias": "model-00034-of-00075.safetensors", + "model.layers.43.self_attn.k_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.self_attn.o_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.self_attn.q_norm.weight": "model-00034-of-00075.safetensors", + "model.layers.43.self_attn.q_proj.bias": "model-00034-of-00075.safetensors", + "model.layers.43.self_attn.q_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.43.self_attn.v_proj.bias": "model-00034-of-00075.safetensors", + "model.layers.43.self_attn.v_proj.weight": "model-00034-of-00075.safetensors", + "model.layers.44.input_layernorm.weight": "model-00036-of-00075.safetensors", + "model.layers.44.mlp.experts.0.down_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.0.gate_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.0.up_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.1.down_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.1.gate_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.1.up_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.10.down_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.10.gate_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.10.up_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.11.down_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.11.gate_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.11.up_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.12.down_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.12.gate_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.12.up_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.13.down_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.13.gate_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.13.up_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.14.down_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.14.gate_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.14.up_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.15.down_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.15.gate_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.15.up_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.16.down_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.16.gate_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.16.up_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.17.down_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.17.gate_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.17.up_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.18.down_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.18.gate_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.18.up_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.19.down_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.19.gate_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.19.up_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.2.down_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.2.gate_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.2.up_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.20.down_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.20.gate_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.20.up_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.21.down_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.21.gate_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.21.up_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.22.down_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.22.gate_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.22.up_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.23.down_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.23.gate_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.23.up_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.24.down_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.24.gate_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.24.up_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.25.down_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.25.gate_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.25.up_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.26.down_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.26.gate_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.26.up_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.27.down_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.27.gate_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.27.up_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.28.down_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.28.gate_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.28.up_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.29.down_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.29.gate_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.29.up_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.3.down_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.3.gate_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.3.up_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.30.down_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.30.gate_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.30.up_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.31.down_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.31.gate_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.31.up_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.32.down_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.32.gate_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.32.up_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.33.down_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.33.gate_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.33.up_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.34.down_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.34.gate_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.34.up_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.35.down_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.35.gate_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.35.up_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.36.down_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.36.gate_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.36.up_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.37.down_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.37.gate_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.37.up_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.38.down_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.38.gate_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.38.up_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.39.down_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.39.gate_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.39.up_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.4.down_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.4.gate_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.4.up_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.40.down_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.40.gate_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.40.up_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.41.down_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.41.gate_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.41.up_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.42.down_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.42.gate_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.42.up_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.43.down_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.43.gate_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.43.up_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.44.down_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.44.gate_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.44.up_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.45.down_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.45.gate_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.45.up_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.46.down_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.46.gate_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.46.up_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.47.down_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.47.gate_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.47.up_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.48.down_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.48.gate_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.48.up_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.49.down_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.49.gate_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.49.up_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.5.down_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.5.gate_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.5.up_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.50.down_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.50.gate_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.50.up_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.51.down_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.51.gate_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.51.up_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.52.down_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.52.gate_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.52.up_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.53.down_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.53.gate_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.53.up_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.54.down_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.54.gate_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.54.up_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.55.down_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.55.gate_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.55.up_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.56.down_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.56.gate_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.56.up_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.57.down_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.57.gate_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.57.up_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.58.down_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.58.gate_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.58.up_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.59.down_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.59.gate_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.59.up_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.6.down_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.6.gate_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.6.up_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.60.down_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.60.gate_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.60.up_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.61.down_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.61.gate_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.61.up_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.62.down_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.44.mlp.experts.62.gate_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.62.up_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.63.down_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.44.mlp.experts.63.gate_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.44.mlp.experts.63.up_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.44.mlp.experts.64.down_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.44.mlp.experts.64.gate_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.44.mlp.experts.64.up_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.44.mlp.experts.65.down_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.44.mlp.experts.65.gate_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.44.mlp.experts.65.up_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.44.mlp.experts.66.down_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.44.mlp.experts.66.gate_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.44.mlp.experts.66.up_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.44.mlp.experts.67.down_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.44.mlp.experts.67.gate_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.44.mlp.experts.67.up_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.44.mlp.experts.68.down_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.44.mlp.experts.68.gate_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.44.mlp.experts.68.up_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.44.mlp.experts.69.down_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.44.mlp.experts.69.gate_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.44.mlp.experts.69.up_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.44.mlp.experts.7.down_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.7.gate_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.7.up_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.70.down_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.44.mlp.experts.70.gate_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.44.mlp.experts.70.up_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.44.mlp.experts.71.down_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.44.mlp.experts.71.gate_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.44.mlp.experts.71.up_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.44.mlp.experts.72.down_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.44.mlp.experts.72.gate_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.44.mlp.experts.72.up_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.44.mlp.experts.73.down_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.44.mlp.experts.73.gate_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.44.mlp.experts.73.up_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.44.mlp.experts.74.down_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.44.mlp.experts.74.gate_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.44.mlp.experts.74.up_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.44.mlp.experts.75.down_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.44.mlp.experts.75.gate_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.44.mlp.experts.75.up_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.44.mlp.experts.76.down_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.44.mlp.experts.76.gate_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.44.mlp.experts.76.up_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.44.mlp.experts.77.down_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.44.mlp.experts.77.gate_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.44.mlp.experts.77.up_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.44.mlp.experts.78.down_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.44.mlp.experts.78.gate_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.44.mlp.experts.78.up_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.44.mlp.experts.79.down_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.44.mlp.experts.79.gate_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.44.mlp.experts.79.up_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.44.mlp.experts.8.down_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.8.gate_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.8.up_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.9.down_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.9.gate_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.experts.9.up_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.mlp.gate.e_score_correction_bias": "model-00036-of-00075.safetensors", + "model.layers.44.mlp.gate.weight": "model-00036-of-00075.safetensors", + "model.layers.44.mlp.shared_experts.down_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.44.mlp.shared_experts.gate_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.44.mlp.shared_experts.up_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.44.post_attention_layernorm.weight": "model-00036-of-00075.safetensors", + "model.layers.44.self_attn.k_norm.weight": "model-00035-of-00075.safetensors", + "model.layers.44.self_attn.k_proj.bias": "model-00035-of-00075.safetensors", + "model.layers.44.self_attn.k_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.self_attn.o_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.self_attn.q_norm.weight": "model-00035-of-00075.safetensors", + "model.layers.44.self_attn.q_proj.bias": "model-00035-of-00075.safetensors", + "model.layers.44.self_attn.q_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.44.self_attn.v_proj.bias": "model-00035-of-00075.safetensors", + "model.layers.44.self_attn.v_proj.weight": "model-00035-of-00075.safetensors", + "model.layers.45.input_layernorm.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.0.down_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.0.gate_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.0.up_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.1.down_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.1.gate_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.1.up_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.10.down_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.10.gate_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.10.up_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.11.down_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.11.gate_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.11.up_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.12.down_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.12.gate_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.12.up_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.13.down_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.13.gate_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.13.up_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.14.down_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.14.gate_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.14.up_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.15.down_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.15.gate_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.15.up_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.16.down_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.16.gate_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.16.up_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.17.down_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.17.gate_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.17.up_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.18.down_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.18.gate_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.18.up_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.19.down_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.19.gate_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.19.up_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.2.down_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.2.gate_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.2.up_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.20.down_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.20.gate_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.20.up_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.21.down_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.21.gate_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.21.up_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.22.down_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.22.gate_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.22.up_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.23.down_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.23.gate_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.23.up_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.24.down_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.24.gate_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.24.up_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.25.down_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.25.gate_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.25.up_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.26.down_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.26.gate_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.26.up_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.27.down_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.27.gate_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.27.up_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.28.down_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.28.gate_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.28.up_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.29.down_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.29.gate_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.29.up_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.3.down_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.3.gate_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.3.up_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.30.down_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.30.gate_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.30.up_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.31.down_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.31.gate_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.31.up_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.32.down_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.32.gate_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.32.up_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.33.down_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.33.gate_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.33.up_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.34.down_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.34.gate_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.34.up_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.35.down_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.35.gate_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.35.up_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.36.down_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.36.gate_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.36.up_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.37.down_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.37.gate_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.37.up_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.38.down_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.38.gate_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.38.up_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.39.down_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.39.gate_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.39.up_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.4.down_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.4.gate_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.4.up_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.40.down_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.40.gate_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.40.up_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.41.down_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.41.gate_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.41.up_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.42.down_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.42.gate_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.42.up_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.43.down_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.43.gate_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.43.up_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.44.down_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.44.gate_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.44.up_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.45.down_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.45.gate_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.45.up_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.46.down_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.46.gate_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.46.up_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.47.down_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.47.gate_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.47.up_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.48.down_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.48.gate_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.48.up_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.49.down_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.49.gate_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.49.up_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.5.down_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.5.gate_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.5.up_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.50.down_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.50.gate_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.50.up_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.51.down_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.51.gate_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.51.up_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.52.down_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.52.gate_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.52.up_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.53.down_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.53.gate_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.53.up_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.54.down_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.54.gate_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.54.up_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.55.down_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.55.gate_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.55.up_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.56.down_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.56.gate_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.56.up_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.57.down_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.57.gate_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.57.up_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.58.down_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.58.gate_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.58.up_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.59.down_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.59.gate_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.59.up_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.6.down_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.6.gate_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.6.up_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.60.down_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.60.gate_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.60.up_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.61.down_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.61.gate_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.61.up_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.62.down_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.62.gate_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.62.up_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.63.down_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.63.gate_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.63.up_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.64.down_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.64.gate_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.64.up_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.65.down_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.65.gate_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.65.up_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.66.down_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.66.gate_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.66.up_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.67.down_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.67.gate_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.67.up_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.68.down_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.68.gate_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.68.up_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.69.down_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.69.gate_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.69.up_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.7.down_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.7.gate_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.7.up_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.70.down_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.70.gate_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.70.up_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.71.down_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.71.gate_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.71.up_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.72.down_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.72.gate_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.72.up_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.73.down_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.73.gate_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.73.up_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.74.down_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.74.gate_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.74.up_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.75.down_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.75.gate_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.75.up_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.76.down_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.76.gate_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.76.up_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.77.down_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.77.gate_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.77.up_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.78.down_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.78.gate_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.78.up_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.79.down_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.79.gate_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.79.up_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.8.down_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.8.gate_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.8.up_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.9.down_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.9.gate_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.experts.9.up_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.gate.e_score_correction_bias": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.gate.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.shared_experts.down_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.shared_experts.gate_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.mlp.shared_experts.up_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.post_attention_layernorm.weight": "model-00036-of-00075.safetensors", + "model.layers.45.self_attn.k_norm.weight": "model-00036-of-00075.safetensors", + "model.layers.45.self_attn.k_proj.bias": "model-00036-of-00075.safetensors", + "model.layers.45.self_attn.k_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.self_attn.o_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.self_attn.q_norm.weight": "model-00036-of-00075.safetensors", + "model.layers.45.self_attn.q_proj.bias": "model-00036-of-00075.safetensors", + "model.layers.45.self_attn.q_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.45.self_attn.v_proj.bias": "model-00036-of-00075.safetensors", + "model.layers.45.self_attn.v_proj.weight": "model-00036-of-00075.safetensors", + "model.layers.46.input_layernorm.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.0.down_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.0.gate_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.0.up_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.1.down_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.1.gate_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.1.up_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.10.down_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.10.gate_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.10.up_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.11.down_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.11.gate_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.11.up_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.12.down_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.12.gate_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.12.up_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.13.down_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.13.gate_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.13.up_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.14.down_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.14.gate_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.14.up_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.15.down_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.15.gate_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.15.up_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.16.down_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.16.gate_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.16.up_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.17.down_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.17.gate_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.17.up_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.18.down_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.18.gate_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.18.up_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.19.down_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.19.gate_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.19.up_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.2.down_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.2.gate_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.2.up_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.20.down_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.20.gate_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.20.up_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.21.down_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.21.gate_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.21.up_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.22.down_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.22.gate_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.22.up_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.23.down_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.23.gate_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.23.up_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.24.down_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.24.gate_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.24.up_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.25.down_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.25.gate_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.25.up_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.26.down_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.26.gate_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.26.up_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.27.down_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.27.gate_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.27.up_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.28.down_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.28.gate_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.28.up_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.29.down_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.29.gate_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.29.up_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.3.down_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.3.gate_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.3.up_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.30.down_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.30.gate_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.30.up_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.31.down_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.31.gate_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.31.up_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.32.down_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.32.gate_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.32.up_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.33.down_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.33.gate_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.33.up_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.34.down_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.34.gate_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.34.up_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.35.down_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.35.gate_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.35.up_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.36.down_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.36.gate_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.36.up_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.37.down_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.37.gate_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.37.up_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.38.down_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.38.gate_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.38.up_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.39.down_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.39.gate_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.39.up_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.4.down_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.4.gate_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.4.up_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.40.down_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.40.gate_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.40.up_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.41.down_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.41.gate_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.41.up_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.42.down_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.42.gate_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.42.up_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.43.down_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.43.gate_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.43.up_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.44.down_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.44.gate_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.44.up_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.45.down_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.45.gate_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.45.up_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.46.down_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.46.gate_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.46.up_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.47.down_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.47.gate_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.47.up_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.48.down_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.48.gate_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.48.up_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.49.down_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.49.gate_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.49.up_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.5.down_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.5.gate_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.5.up_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.50.down_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.50.gate_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.50.up_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.51.down_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.51.gate_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.51.up_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.52.down_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.52.gate_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.52.up_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.53.down_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.53.gate_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.53.up_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.54.down_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.54.gate_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.54.up_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.55.down_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.55.gate_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.55.up_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.56.down_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.56.gate_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.56.up_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.57.down_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.57.gate_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.57.up_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.58.down_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.58.gate_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.58.up_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.59.down_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.59.gate_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.59.up_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.6.down_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.6.gate_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.6.up_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.60.down_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.60.gate_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.60.up_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.61.down_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.61.gate_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.61.up_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.62.down_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.62.gate_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.62.up_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.63.down_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.63.gate_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.63.up_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.64.down_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.64.gate_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.64.up_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.65.down_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.65.gate_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.65.up_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.66.down_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.66.gate_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.66.up_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.67.down_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.67.gate_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.67.up_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.68.down_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.68.gate_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.68.up_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.69.down_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.69.gate_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.69.up_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.7.down_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.7.gate_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.7.up_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.70.down_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.70.gate_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.70.up_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.71.down_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.71.gate_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.71.up_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.72.down_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.72.gate_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.72.up_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.73.down_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.73.gate_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.73.up_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.74.down_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.74.gate_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.74.up_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.75.down_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.75.gate_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.75.up_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.76.down_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.76.gate_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.76.up_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.77.down_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.77.gate_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.77.up_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.78.down_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.78.gate_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.78.up_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.79.down_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.79.gate_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.79.up_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.8.down_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.8.gate_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.8.up_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.9.down_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.9.gate_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.experts.9.up_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.gate.e_score_correction_bias": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.gate.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.shared_experts.down_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.shared_experts.gate_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.mlp.shared_experts.up_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.post_attention_layernorm.weight": "model-00037-of-00075.safetensors", + "model.layers.46.self_attn.k_norm.weight": "model-00037-of-00075.safetensors", + "model.layers.46.self_attn.k_proj.bias": "model-00037-of-00075.safetensors", + "model.layers.46.self_attn.k_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.self_attn.o_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.self_attn.q_norm.weight": "model-00037-of-00075.safetensors", + "model.layers.46.self_attn.q_proj.bias": "model-00037-of-00075.safetensors", + "model.layers.46.self_attn.q_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.46.self_attn.v_proj.bias": "model-00037-of-00075.safetensors", + "model.layers.46.self_attn.v_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.47.input_layernorm.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.0.down_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.47.mlp.experts.0.gate_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.47.mlp.experts.0.up_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.47.mlp.experts.1.down_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.47.mlp.experts.1.gate_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.47.mlp.experts.1.up_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.47.mlp.experts.10.down_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.47.mlp.experts.10.gate_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.47.mlp.experts.10.up_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.47.mlp.experts.11.down_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.47.mlp.experts.11.gate_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.47.mlp.experts.11.up_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.47.mlp.experts.12.down_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.47.mlp.experts.12.gate_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.47.mlp.experts.12.up_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.47.mlp.experts.13.down_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.13.gate_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.47.mlp.experts.13.up_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.14.down_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.14.gate_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.14.up_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.15.down_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.15.gate_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.15.up_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.16.down_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.16.gate_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.16.up_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.17.down_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.17.gate_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.17.up_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.18.down_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.18.gate_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.18.up_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.19.down_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.19.gate_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.19.up_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.2.down_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.47.mlp.experts.2.gate_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.47.mlp.experts.2.up_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.47.mlp.experts.20.down_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.20.gate_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.20.up_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.21.down_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.21.gate_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.21.up_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.22.down_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.22.gate_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.22.up_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.23.down_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.23.gate_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.23.up_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.24.down_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.24.gate_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.24.up_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.25.down_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.25.gate_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.25.up_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.26.down_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.26.gate_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.26.up_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.27.down_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.27.gate_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.27.up_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.28.down_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.28.gate_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.28.up_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.29.down_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.29.gate_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.29.up_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.3.down_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.47.mlp.experts.3.gate_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.47.mlp.experts.3.up_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.47.mlp.experts.30.down_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.30.gate_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.30.up_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.31.down_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.31.gate_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.31.up_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.32.down_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.32.gate_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.32.up_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.33.down_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.33.gate_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.33.up_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.34.down_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.34.gate_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.34.up_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.35.down_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.35.gate_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.35.up_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.36.down_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.36.gate_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.36.up_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.37.down_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.37.gate_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.37.up_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.38.down_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.38.gate_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.38.up_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.39.down_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.39.gate_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.39.up_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.4.down_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.47.mlp.experts.4.gate_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.47.mlp.experts.4.up_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.47.mlp.experts.40.down_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.40.gate_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.40.up_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.41.down_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.41.gate_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.41.up_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.42.down_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.42.gate_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.42.up_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.43.down_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.43.gate_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.43.up_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.44.down_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.44.gate_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.44.up_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.45.down_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.45.gate_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.45.up_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.46.down_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.46.gate_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.46.up_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.47.down_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.47.gate_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.47.up_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.48.down_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.48.gate_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.48.up_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.49.down_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.49.gate_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.49.up_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.5.down_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.47.mlp.experts.5.gate_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.47.mlp.experts.5.up_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.47.mlp.experts.50.down_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.50.gate_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.50.up_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.51.down_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.51.gate_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.51.up_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.52.down_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.52.gate_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.52.up_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.53.down_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.53.gate_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.53.up_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.54.down_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.54.gate_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.54.up_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.55.down_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.55.gate_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.55.up_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.56.down_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.56.gate_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.56.up_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.57.down_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.57.gate_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.57.up_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.58.down_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.58.gate_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.58.up_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.59.down_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.59.gate_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.59.up_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.6.down_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.47.mlp.experts.6.gate_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.47.mlp.experts.6.up_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.47.mlp.experts.60.down_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.60.gate_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.60.up_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.61.down_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.61.gate_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.61.up_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.62.down_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.62.gate_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.62.up_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.63.down_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.63.gate_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.63.up_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.64.down_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.64.gate_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.64.up_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.65.down_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.65.gate_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.65.up_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.66.down_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.66.gate_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.66.up_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.67.down_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.67.gate_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.67.up_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.68.down_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.68.gate_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.68.up_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.69.down_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.69.gate_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.69.up_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.7.down_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.47.mlp.experts.7.gate_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.47.mlp.experts.7.up_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.47.mlp.experts.70.down_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.70.gate_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.70.up_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.71.down_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.71.gate_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.71.up_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.72.down_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.72.gate_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.72.up_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.73.down_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.73.gate_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.73.up_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.74.down_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.74.gate_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.74.up_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.75.down_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.75.gate_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.75.up_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.76.down_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.76.gate_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.76.up_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.77.down_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.77.gate_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.77.up_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.78.down_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.78.gate_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.78.up_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.79.down_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.79.gate_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.79.up_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.experts.8.down_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.47.mlp.experts.8.gate_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.47.mlp.experts.8.up_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.47.mlp.experts.9.down_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.47.mlp.experts.9.gate_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.47.mlp.experts.9.up_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.47.mlp.gate.e_score_correction_bias": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.gate.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.shared_experts.down_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.shared_experts.gate_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.mlp.shared_experts.up_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.47.post_attention_layernorm.weight": "model-00038-of-00075.safetensors", + "model.layers.47.self_attn.k_norm.weight": "model-00037-of-00075.safetensors", + "model.layers.47.self_attn.k_proj.bias": "model-00037-of-00075.safetensors", + "model.layers.47.self_attn.k_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.47.self_attn.o_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.47.self_attn.q_norm.weight": "model-00037-of-00075.safetensors", + "model.layers.47.self_attn.q_proj.bias": "model-00037-of-00075.safetensors", + "model.layers.47.self_attn.q_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.47.self_attn.v_proj.bias": "model-00037-of-00075.safetensors", + "model.layers.47.self_attn.v_proj.weight": "model-00037-of-00075.safetensors", + "model.layers.48.input_layernorm.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.0.down_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.48.mlp.experts.0.gate_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.48.mlp.experts.0.up_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.48.mlp.experts.1.down_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.48.mlp.experts.1.gate_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.48.mlp.experts.1.up_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.48.mlp.experts.10.down_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.48.mlp.experts.10.gate_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.48.mlp.experts.10.up_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.48.mlp.experts.11.down_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.48.mlp.experts.11.gate_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.48.mlp.experts.11.up_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.48.mlp.experts.12.down_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.48.mlp.experts.12.gate_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.48.mlp.experts.12.up_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.48.mlp.experts.13.down_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.48.mlp.experts.13.gate_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.48.mlp.experts.13.up_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.48.mlp.experts.14.down_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.48.mlp.experts.14.gate_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.48.mlp.experts.14.up_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.48.mlp.experts.15.down_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.48.mlp.experts.15.gate_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.48.mlp.experts.15.up_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.48.mlp.experts.16.down_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.48.mlp.experts.16.gate_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.48.mlp.experts.16.up_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.48.mlp.experts.17.down_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.48.mlp.experts.17.gate_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.48.mlp.experts.17.up_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.48.mlp.experts.18.down_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.48.mlp.experts.18.gate_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.48.mlp.experts.18.up_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.48.mlp.experts.19.down_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.48.mlp.experts.19.gate_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.48.mlp.experts.19.up_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.48.mlp.experts.2.down_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.48.mlp.experts.2.gate_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.48.mlp.experts.2.up_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.48.mlp.experts.20.down_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.48.mlp.experts.20.gate_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.48.mlp.experts.20.up_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.48.mlp.experts.21.down_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.48.mlp.experts.21.gate_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.48.mlp.experts.21.up_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.48.mlp.experts.22.down_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.48.mlp.experts.22.gate_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.48.mlp.experts.22.up_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.48.mlp.experts.23.down_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.48.mlp.experts.23.gate_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.48.mlp.experts.23.up_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.48.mlp.experts.24.down_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.48.mlp.experts.24.gate_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.48.mlp.experts.24.up_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.48.mlp.experts.25.down_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.48.mlp.experts.25.gate_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.48.mlp.experts.25.up_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.48.mlp.experts.26.down_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.48.mlp.experts.26.gate_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.48.mlp.experts.26.up_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.48.mlp.experts.27.down_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.48.mlp.experts.27.gate_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.48.mlp.experts.27.up_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.48.mlp.experts.28.down_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.48.mlp.experts.28.gate_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.48.mlp.experts.28.up_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.48.mlp.experts.29.down_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.48.mlp.experts.29.gate_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.48.mlp.experts.29.up_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.48.mlp.experts.3.down_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.48.mlp.experts.3.gate_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.48.mlp.experts.3.up_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.48.mlp.experts.30.down_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.48.mlp.experts.30.gate_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.48.mlp.experts.30.up_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.48.mlp.experts.31.down_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.48.mlp.experts.31.gate_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.48.mlp.experts.31.up_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.48.mlp.experts.32.down_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.32.gate_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.48.mlp.experts.32.up_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.33.down_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.33.gate_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.33.up_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.34.down_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.34.gate_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.34.up_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.35.down_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.35.gate_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.35.up_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.36.down_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.36.gate_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.36.up_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.37.down_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.37.gate_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.37.up_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.38.down_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.38.gate_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.38.up_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.39.down_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.39.gate_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.39.up_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.4.down_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.48.mlp.experts.4.gate_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.48.mlp.experts.4.up_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.48.mlp.experts.40.down_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.40.gate_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.40.up_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.41.down_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.41.gate_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.41.up_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.42.down_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.42.gate_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.42.up_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.43.down_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.43.gate_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.43.up_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.44.down_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.44.gate_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.44.up_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.45.down_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.45.gate_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.45.up_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.46.down_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.46.gate_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.46.up_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.47.down_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.47.gate_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.47.up_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.48.down_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.48.gate_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.48.up_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.49.down_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.49.gate_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.49.up_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.5.down_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.48.mlp.experts.5.gate_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.48.mlp.experts.5.up_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.48.mlp.experts.50.down_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.50.gate_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.50.up_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.51.down_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.51.gate_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.51.up_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.52.down_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.52.gate_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.52.up_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.53.down_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.53.gate_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.53.up_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.54.down_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.54.gate_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.54.up_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.55.down_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.55.gate_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.55.up_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.56.down_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.56.gate_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.56.up_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.57.down_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.57.gate_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.57.up_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.58.down_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.58.gate_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.58.up_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.59.down_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.59.gate_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.59.up_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.6.down_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.48.mlp.experts.6.gate_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.48.mlp.experts.6.up_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.48.mlp.experts.60.down_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.60.gate_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.60.up_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.61.down_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.61.gate_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.61.up_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.62.down_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.62.gate_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.62.up_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.63.down_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.63.gate_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.63.up_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.64.down_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.64.gate_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.64.up_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.65.down_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.65.gate_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.65.up_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.66.down_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.66.gate_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.66.up_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.67.down_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.67.gate_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.67.up_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.68.down_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.68.gate_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.68.up_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.69.down_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.69.gate_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.69.up_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.7.down_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.48.mlp.experts.7.gate_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.48.mlp.experts.7.up_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.48.mlp.experts.70.down_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.70.gate_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.70.up_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.71.down_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.71.gate_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.71.up_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.72.down_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.72.gate_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.72.up_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.73.down_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.73.gate_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.73.up_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.74.down_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.74.gate_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.74.up_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.75.down_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.75.gate_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.75.up_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.76.down_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.76.gate_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.76.up_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.77.down_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.77.gate_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.77.up_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.78.down_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.78.gate_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.78.up_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.79.down_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.79.gate_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.79.up_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.experts.8.down_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.48.mlp.experts.8.gate_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.48.mlp.experts.8.up_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.48.mlp.experts.9.down_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.48.mlp.experts.9.gate_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.48.mlp.experts.9.up_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.48.mlp.gate.e_score_correction_bias": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.gate.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.shared_experts.down_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.shared_experts.gate_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.mlp.shared_experts.up_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.48.post_attention_layernorm.weight": "model-00039-of-00075.safetensors", + "model.layers.48.self_attn.k_norm.weight": "model-00038-of-00075.safetensors", + "model.layers.48.self_attn.k_proj.bias": "model-00038-of-00075.safetensors", + "model.layers.48.self_attn.k_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.48.self_attn.o_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.48.self_attn.q_norm.weight": "model-00038-of-00075.safetensors", + "model.layers.48.self_attn.q_proj.bias": "model-00038-of-00075.safetensors", + "model.layers.48.self_attn.q_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.48.self_attn.v_proj.bias": "model-00038-of-00075.safetensors", + "model.layers.48.self_attn.v_proj.weight": "model-00038-of-00075.safetensors", + "model.layers.49.input_layernorm.weight": "model-00040-of-00075.safetensors", + "model.layers.49.mlp.experts.0.down_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.0.gate_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.0.up_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.1.down_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.1.gate_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.1.up_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.10.down_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.10.gate_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.10.up_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.11.down_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.11.gate_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.11.up_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.12.down_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.12.gate_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.12.up_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.13.down_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.13.gate_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.13.up_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.14.down_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.14.gate_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.14.up_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.15.down_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.15.gate_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.15.up_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.16.down_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.16.gate_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.16.up_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.17.down_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.17.gate_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.17.up_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.18.down_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.18.gate_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.18.up_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.19.down_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.19.gate_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.19.up_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.2.down_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.2.gate_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.2.up_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.20.down_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.20.gate_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.20.up_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.21.down_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.21.gate_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.21.up_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.22.down_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.22.gate_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.22.up_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.23.down_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.23.gate_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.23.up_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.24.down_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.24.gate_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.24.up_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.25.down_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.25.gate_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.25.up_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.26.down_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.26.gate_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.26.up_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.27.down_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.27.gate_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.27.up_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.28.down_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.28.gate_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.28.up_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.29.down_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.29.gate_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.29.up_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.3.down_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.3.gate_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.3.up_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.30.down_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.30.gate_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.30.up_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.31.down_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.31.gate_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.31.up_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.32.down_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.32.gate_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.32.up_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.33.down_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.33.gate_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.33.up_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.34.down_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.34.gate_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.34.up_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.35.down_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.35.gate_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.35.up_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.36.down_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.36.gate_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.36.up_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.37.down_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.37.gate_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.37.up_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.38.down_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.38.gate_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.38.up_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.39.down_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.39.gate_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.39.up_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.4.down_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.4.gate_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.4.up_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.40.down_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.40.gate_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.40.up_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.41.down_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.41.gate_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.41.up_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.42.down_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.42.gate_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.42.up_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.43.down_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.43.gate_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.43.up_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.44.down_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.44.gate_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.44.up_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.45.down_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.45.gate_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.45.up_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.46.down_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.46.gate_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.46.up_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.47.down_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.47.gate_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.47.up_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.48.down_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.48.gate_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.48.up_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.49.down_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.49.gate_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.49.up_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.5.down_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.5.gate_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.5.up_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.50.down_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.50.gate_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.50.up_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.51.down_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.49.mlp.experts.51.gate_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.51.up_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.49.mlp.experts.52.down_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.49.mlp.experts.52.gate_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.49.mlp.experts.52.up_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.49.mlp.experts.53.down_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.49.mlp.experts.53.gate_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.49.mlp.experts.53.up_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.49.mlp.experts.54.down_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.49.mlp.experts.54.gate_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.49.mlp.experts.54.up_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.49.mlp.experts.55.down_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.49.mlp.experts.55.gate_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.49.mlp.experts.55.up_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.49.mlp.experts.56.down_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.49.mlp.experts.56.gate_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.49.mlp.experts.56.up_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.49.mlp.experts.57.down_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.49.mlp.experts.57.gate_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.49.mlp.experts.57.up_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.49.mlp.experts.58.down_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.49.mlp.experts.58.gate_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.49.mlp.experts.58.up_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.49.mlp.experts.59.down_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.49.mlp.experts.59.gate_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.49.mlp.experts.59.up_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.49.mlp.experts.6.down_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.6.gate_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.6.up_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.60.down_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.49.mlp.experts.60.gate_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.49.mlp.experts.60.up_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.49.mlp.experts.61.down_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.49.mlp.experts.61.gate_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.49.mlp.experts.61.up_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.49.mlp.experts.62.down_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.49.mlp.experts.62.gate_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.49.mlp.experts.62.up_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.49.mlp.experts.63.down_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.49.mlp.experts.63.gate_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.49.mlp.experts.63.up_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.49.mlp.experts.64.down_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.49.mlp.experts.64.gate_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.49.mlp.experts.64.up_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.49.mlp.experts.65.down_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.49.mlp.experts.65.gate_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.49.mlp.experts.65.up_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.49.mlp.experts.66.down_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.49.mlp.experts.66.gate_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.49.mlp.experts.66.up_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.49.mlp.experts.67.down_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.49.mlp.experts.67.gate_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.49.mlp.experts.67.up_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.49.mlp.experts.68.down_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.49.mlp.experts.68.gate_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.49.mlp.experts.68.up_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.49.mlp.experts.69.down_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.49.mlp.experts.69.gate_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.49.mlp.experts.69.up_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.49.mlp.experts.7.down_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.7.gate_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.7.up_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.70.down_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.49.mlp.experts.70.gate_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.49.mlp.experts.70.up_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.49.mlp.experts.71.down_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.49.mlp.experts.71.gate_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.49.mlp.experts.71.up_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.49.mlp.experts.72.down_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.49.mlp.experts.72.gate_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.49.mlp.experts.72.up_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.49.mlp.experts.73.down_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.49.mlp.experts.73.gate_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.49.mlp.experts.73.up_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.49.mlp.experts.74.down_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.49.mlp.experts.74.gate_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.49.mlp.experts.74.up_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.49.mlp.experts.75.down_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.49.mlp.experts.75.gate_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.49.mlp.experts.75.up_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.49.mlp.experts.76.down_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.49.mlp.experts.76.gate_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.49.mlp.experts.76.up_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.49.mlp.experts.77.down_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.49.mlp.experts.77.gate_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.49.mlp.experts.77.up_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.49.mlp.experts.78.down_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.49.mlp.experts.78.gate_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.49.mlp.experts.78.up_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.49.mlp.experts.79.down_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.49.mlp.experts.79.gate_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.49.mlp.experts.79.up_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.49.mlp.experts.8.down_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.8.gate_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.8.up_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.9.down_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.9.gate_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.experts.9.up_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.mlp.gate.e_score_correction_bias": "model-00040-of-00075.safetensors", + "model.layers.49.mlp.gate.weight": "model-00040-of-00075.safetensors", + "model.layers.49.mlp.shared_experts.down_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.49.mlp.shared_experts.gate_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.49.mlp.shared_experts.up_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.49.post_attention_layernorm.weight": "model-00040-of-00075.safetensors", + "model.layers.49.self_attn.k_norm.weight": "model-00039-of-00075.safetensors", + "model.layers.49.self_attn.k_proj.bias": "model-00039-of-00075.safetensors", + "model.layers.49.self_attn.k_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.self_attn.o_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.self_attn.q_norm.weight": "model-00039-of-00075.safetensors", + "model.layers.49.self_attn.q_proj.bias": "model-00039-of-00075.safetensors", + "model.layers.49.self_attn.q_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.49.self_attn.v_proj.bias": "model-00039-of-00075.safetensors", + "model.layers.49.self_attn.v_proj.weight": "model-00039-of-00075.safetensors", + "model.layers.5.input_layernorm.weight": "model-00004-of-00075.safetensors", + "model.layers.5.mlp.experts.0.down_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.0.gate_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.0.up_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.1.down_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.1.gate_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.1.up_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.10.down_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.10.gate_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.10.up_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.11.down_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.11.gate_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.11.up_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.12.down_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.12.gate_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.12.up_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.13.down_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.13.gate_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.13.up_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.14.down_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.14.gate_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.14.up_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.15.down_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.15.gate_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.15.up_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.16.down_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.16.gate_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.16.up_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.17.down_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.17.gate_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.17.up_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.18.down_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.18.gate_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.18.up_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.19.down_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.19.gate_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.19.up_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.2.down_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.2.gate_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.2.up_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.20.down_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.20.gate_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.20.up_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.21.down_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.21.gate_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.21.up_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.22.down_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.22.gate_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.22.up_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.23.down_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.23.gate_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.23.up_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.24.down_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.24.gate_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.24.up_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.25.down_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.25.gate_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.25.up_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.26.down_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.26.gate_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.26.up_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.27.down_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.27.gate_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.27.up_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.28.down_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.28.gate_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.28.up_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.29.down_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.29.gate_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.29.up_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.3.down_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.3.gate_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.3.up_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.30.down_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.30.gate_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.30.up_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.31.down_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.31.gate_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.31.up_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.32.down_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.32.gate_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.32.up_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.33.down_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.33.gate_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.33.up_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.34.down_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.34.gate_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.34.up_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.35.down_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.35.gate_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.35.up_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.36.down_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.36.gate_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.36.up_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.37.down_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.37.gate_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.37.up_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.38.down_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.38.gate_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.38.up_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.39.down_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.39.gate_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.39.up_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.4.down_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.4.gate_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.4.up_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.40.down_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.40.gate_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.40.up_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.41.down_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.41.gate_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.41.up_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.42.down_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.42.gate_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.42.up_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.43.down_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.43.gate_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.43.up_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.44.down_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.44.gate_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.44.up_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.45.down_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.45.gate_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.45.up_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.46.down_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.46.gate_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.46.up_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.47.down_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.47.gate_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.47.up_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.48.down_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.48.gate_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.48.up_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.49.down_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.49.gate_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.49.up_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.5.down_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.5.gate_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.5.up_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.50.down_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.50.gate_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.50.up_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.51.down_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.51.gate_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.51.up_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.52.down_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.52.gate_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.52.up_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.53.down_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.53.gate_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.53.up_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.54.down_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.54.gate_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.54.up_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.55.down_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.55.gate_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.55.up_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.56.down_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.56.gate_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.56.up_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.57.down_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.57.gate_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.57.up_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.58.down_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.58.gate_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.58.up_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.59.down_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.59.gate_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.59.up_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.6.down_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.6.gate_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.6.up_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.60.down_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.60.gate_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.60.up_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.61.down_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.61.gate_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.61.up_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.62.down_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.62.gate_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.62.up_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.63.down_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.5.mlp.experts.63.gate_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.63.up_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.64.down_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.5.mlp.experts.64.gate_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.5.mlp.experts.64.up_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.5.mlp.experts.65.down_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.5.mlp.experts.65.gate_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.5.mlp.experts.65.up_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.5.mlp.experts.66.down_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.5.mlp.experts.66.gate_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.5.mlp.experts.66.up_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.5.mlp.experts.67.down_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.5.mlp.experts.67.gate_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.5.mlp.experts.67.up_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.5.mlp.experts.68.down_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.5.mlp.experts.68.gate_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.5.mlp.experts.68.up_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.5.mlp.experts.69.down_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.5.mlp.experts.69.gate_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.5.mlp.experts.69.up_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.5.mlp.experts.7.down_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.7.gate_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.7.up_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.70.down_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.5.mlp.experts.70.gate_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.5.mlp.experts.70.up_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.5.mlp.experts.71.down_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.5.mlp.experts.71.gate_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.5.mlp.experts.71.up_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.5.mlp.experts.72.down_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.5.mlp.experts.72.gate_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.5.mlp.experts.72.up_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.5.mlp.experts.73.down_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.5.mlp.experts.73.gate_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.5.mlp.experts.73.up_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.5.mlp.experts.74.down_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.5.mlp.experts.74.gate_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.5.mlp.experts.74.up_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.5.mlp.experts.75.down_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.5.mlp.experts.75.gate_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.5.mlp.experts.75.up_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.5.mlp.experts.76.down_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.5.mlp.experts.76.gate_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.5.mlp.experts.76.up_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.5.mlp.experts.77.down_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.5.mlp.experts.77.gate_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.5.mlp.experts.77.up_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.5.mlp.experts.78.down_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.5.mlp.experts.78.gate_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.5.mlp.experts.78.up_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.5.mlp.experts.79.down_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.5.mlp.experts.79.gate_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.5.mlp.experts.79.up_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.5.mlp.experts.8.down_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.8.gate_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.8.up_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.9.down_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.9.gate_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.experts.9.up_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.mlp.gate.e_score_correction_bias": "model-00004-of-00075.safetensors", + "model.layers.5.mlp.gate.weight": "model-00004-of-00075.safetensors", + "model.layers.5.mlp.shared_experts.down_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.5.mlp.shared_experts.gate_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.5.mlp.shared_experts.up_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00004-of-00075.safetensors", + "model.layers.5.self_attn.k_norm.weight": "model-00003-of-00075.safetensors", + "model.layers.5.self_attn.k_proj.bias": "model-00003-of-00075.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.self_attn.q_norm.weight": "model-00003-of-00075.safetensors", + "model.layers.5.self_attn.q_proj.bias": "model-00003-of-00075.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.5.self_attn.v_proj.bias": "model-00003-of-00075.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00003-of-00075.safetensors", + "model.layers.50.input_layernorm.weight": "model-00041-of-00075.safetensors", + "model.layers.50.mlp.experts.0.down_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.0.gate_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.0.up_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.1.down_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.1.gate_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.1.up_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.10.down_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.10.gate_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.10.up_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.11.down_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.11.gate_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.11.up_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.12.down_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.12.gate_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.12.up_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.13.down_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.13.gate_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.13.up_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.14.down_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.14.gate_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.14.up_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.15.down_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.15.gate_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.15.up_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.16.down_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.16.gate_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.16.up_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.17.down_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.17.gate_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.17.up_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.18.down_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.18.gate_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.18.up_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.19.down_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.19.gate_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.19.up_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.2.down_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.2.gate_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.2.up_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.20.down_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.20.gate_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.20.up_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.21.down_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.21.gate_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.21.up_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.22.down_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.22.gate_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.22.up_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.23.down_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.23.gate_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.23.up_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.24.down_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.24.gate_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.24.up_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.25.down_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.25.gate_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.25.up_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.26.down_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.26.gate_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.26.up_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.27.down_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.27.gate_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.27.up_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.28.down_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.28.gate_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.28.up_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.29.down_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.29.gate_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.29.up_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.3.down_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.3.gate_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.3.up_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.30.down_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.30.gate_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.30.up_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.31.down_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.31.gate_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.31.up_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.32.down_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.32.gate_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.32.up_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.33.down_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.33.gate_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.33.up_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.34.down_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.34.gate_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.34.up_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.35.down_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.35.gate_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.35.up_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.36.down_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.36.gate_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.36.up_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.37.down_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.37.gate_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.37.up_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.38.down_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.38.gate_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.38.up_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.39.down_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.39.gate_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.39.up_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.4.down_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.4.gate_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.4.up_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.40.down_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.40.gate_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.40.up_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.41.down_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.41.gate_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.41.up_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.42.down_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.42.gate_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.42.up_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.43.down_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.43.gate_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.43.up_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.44.down_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.44.gate_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.44.up_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.45.down_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.45.gate_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.45.up_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.46.down_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.46.gate_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.46.up_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.47.down_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.47.gate_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.47.up_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.48.down_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.48.gate_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.48.up_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.49.down_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.49.gate_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.49.up_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.5.down_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.5.gate_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.5.up_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.50.down_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.50.gate_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.50.up_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.51.down_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.51.gate_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.51.up_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.52.down_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.52.gate_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.52.up_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.53.down_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.53.gate_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.53.up_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.54.down_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.54.gate_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.54.up_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.55.down_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.55.gate_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.55.up_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.56.down_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.56.gate_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.56.up_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.57.down_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.57.gate_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.57.up_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.58.down_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.58.gate_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.58.up_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.59.down_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.59.gate_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.59.up_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.6.down_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.6.gate_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.6.up_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.60.down_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.60.gate_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.60.up_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.61.down_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.61.gate_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.61.up_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.62.down_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.62.gate_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.62.up_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.63.down_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.63.gate_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.63.up_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.64.down_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.64.gate_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.64.up_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.65.down_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.65.gate_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.65.up_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.66.down_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.66.gate_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.66.up_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.67.down_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.67.gate_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.67.up_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.68.down_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.68.gate_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.68.up_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.69.down_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.69.gate_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.69.up_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.7.down_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.7.gate_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.7.up_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.70.down_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.50.mlp.experts.70.gate_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.70.up_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.50.mlp.experts.71.down_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.50.mlp.experts.71.gate_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.50.mlp.experts.71.up_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.50.mlp.experts.72.down_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.50.mlp.experts.72.gate_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.50.mlp.experts.72.up_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.50.mlp.experts.73.down_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.50.mlp.experts.73.gate_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.50.mlp.experts.73.up_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.50.mlp.experts.74.down_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.50.mlp.experts.74.gate_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.50.mlp.experts.74.up_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.50.mlp.experts.75.down_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.50.mlp.experts.75.gate_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.50.mlp.experts.75.up_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.50.mlp.experts.76.down_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.50.mlp.experts.76.gate_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.50.mlp.experts.76.up_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.50.mlp.experts.77.down_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.50.mlp.experts.77.gate_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.50.mlp.experts.77.up_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.50.mlp.experts.78.down_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.50.mlp.experts.78.gate_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.50.mlp.experts.78.up_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.50.mlp.experts.79.down_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.50.mlp.experts.79.gate_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.50.mlp.experts.79.up_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.50.mlp.experts.8.down_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.8.gate_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.8.up_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.9.down_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.9.gate_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.experts.9.up_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.mlp.gate.e_score_correction_bias": "model-00041-of-00075.safetensors", + "model.layers.50.mlp.gate.weight": "model-00041-of-00075.safetensors", + "model.layers.50.mlp.shared_experts.down_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.50.mlp.shared_experts.gate_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.50.mlp.shared_experts.up_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.50.post_attention_layernorm.weight": "model-00041-of-00075.safetensors", + "model.layers.50.self_attn.k_norm.weight": "model-00040-of-00075.safetensors", + "model.layers.50.self_attn.k_proj.bias": "model-00040-of-00075.safetensors", + "model.layers.50.self_attn.k_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.self_attn.o_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.self_attn.q_norm.weight": "model-00040-of-00075.safetensors", + "model.layers.50.self_attn.q_proj.bias": "model-00040-of-00075.safetensors", + "model.layers.50.self_attn.q_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.50.self_attn.v_proj.bias": "model-00040-of-00075.safetensors", + "model.layers.50.self_attn.v_proj.weight": "model-00040-of-00075.safetensors", + "model.layers.51.input_layernorm.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.0.down_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.0.gate_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.0.up_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.1.down_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.1.gate_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.1.up_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.10.down_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.10.gate_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.10.up_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.11.down_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.11.gate_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.11.up_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.12.down_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.12.gate_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.12.up_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.13.down_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.13.gate_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.13.up_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.14.down_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.14.gate_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.14.up_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.15.down_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.15.gate_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.15.up_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.16.down_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.16.gate_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.16.up_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.17.down_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.17.gate_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.17.up_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.18.down_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.18.gate_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.18.up_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.19.down_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.19.gate_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.19.up_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.2.down_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.2.gate_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.2.up_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.20.down_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.20.gate_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.20.up_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.21.down_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.21.gate_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.21.up_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.22.down_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.22.gate_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.22.up_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.23.down_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.23.gate_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.23.up_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.24.down_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.24.gate_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.24.up_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.25.down_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.25.gate_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.25.up_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.26.down_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.26.gate_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.26.up_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.27.down_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.27.gate_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.27.up_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.28.down_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.28.gate_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.28.up_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.29.down_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.29.gate_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.29.up_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.3.down_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.3.gate_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.3.up_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.30.down_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.30.gate_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.30.up_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.31.down_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.31.gate_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.31.up_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.32.down_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.32.gate_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.32.up_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.33.down_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.33.gate_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.33.up_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.34.down_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.34.gate_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.34.up_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.35.down_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.35.gate_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.35.up_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.36.down_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.36.gate_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.36.up_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.37.down_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.37.gate_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.37.up_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.38.down_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.38.gate_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.38.up_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.39.down_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.39.gate_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.39.up_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.4.down_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.4.gate_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.4.up_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.40.down_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.40.gate_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.40.up_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.41.down_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.41.gate_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.41.up_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.42.down_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.42.gate_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.42.up_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.43.down_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.43.gate_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.43.up_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.44.down_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.44.gate_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.44.up_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.45.down_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.45.gate_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.45.up_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.46.down_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.46.gate_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.46.up_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.47.down_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.47.gate_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.47.up_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.48.down_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.48.gate_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.48.up_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.49.down_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.49.gate_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.49.up_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.5.down_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.5.gate_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.5.up_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.50.down_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.50.gate_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.50.up_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.51.down_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.51.gate_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.51.up_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.52.down_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.52.gate_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.52.up_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.53.down_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.53.gate_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.53.up_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.54.down_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.54.gate_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.54.up_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.55.down_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.55.gate_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.55.up_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.56.down_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.56.gate_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.56.up_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.57.down_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.57.gate_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.57.up_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.58.down_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.58.gate_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.58.up_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.59.down_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.59.gate_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.59.up_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.6.down_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.6.gate_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.6.up_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.60.down_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.60.gate_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.60.up_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.61.down_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.61.gate_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.61.up_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.62.down_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.62.gate_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.62.up_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.63.down_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.63.gate_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.63.up_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.64.down_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.64.gate_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.64.up_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.65.down_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.65.gate_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.65.up_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.66.down_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.66.gate_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.66.up_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.67.down_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.67.gate_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.67.up_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.68.down_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.68.gate_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.68.up_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.69.down_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.69.gate_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.69.up_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.7.down_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.7.gate_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.7.up_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.70.down_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.70.gate_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.70.up_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.71.down_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.71.gate_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.71.up_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.72.down_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.72.gate_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.72.up_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.73.down_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.73.gate_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.73.up_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.74.down_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.74.gate_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.74.up_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.75.down_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.75.gate_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.75.up_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.76.down_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.76.gate_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.76.up_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.77.down_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.77.gate_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.77.up_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.78.down_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.78.gate_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.78.up_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.79.down_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.79.gate_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.79.up_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.8.down_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.8.gate_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.8.up_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.9.down_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.9.gate_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.experts.9.up_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.gate.e_score_correction_bias": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.gate.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.shared_experts.down_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.shared_experts.gate_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.mlp.shared_experts.up_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.post_attention_layernorm.weight": "model-00041-of-00075.safetensors", + "model.layers.51.self_attn.k_norm.weight": "model-00041-of-00075.safetensors", + "model.layers.51.self_attn.k_proj.bias": "model-00041-of-00075.safetensors", + "model.layers.51.self_attn.k_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.self_attn.o_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.self_attn.q_norm.weight": "model-00041-of-00075.safetensors", + "model.layers.51.self_attn.q_proj.bias": "model-00041-of-00075.safetensors", + "model.layers.51.self_attn.q_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.51.self_attn.v_proj.bias": "model-00041-of-00075.safetensors", + "model.layers.51.self_attn.v_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.52.input_layernorm.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.0.down_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.52.mlp.experts.0.gate_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.52.mlp.experts.0.up_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.52.mlp.experts.1.down_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.52.mlp.experts.1.gate_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.52.mlp.experts.1.up_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.52.mlp.experts.10.down_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.10.gate_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.10.up_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.11.down_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.11.gate_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.11.up_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.12.down_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.12.gate_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.12.up_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.13.down_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.13.gate_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.13.up_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.14.down_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.14.gate_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.14.up_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.15.down_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.15.gate_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.15.up_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.16.down_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.16.gate_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.16.up_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.17.down_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.17.gate_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.17.up_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.18.down_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.18.gate_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.18.up_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.19.down_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.19.gate_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.19.up_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.2.down_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.2.gate_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.52.mlp.experts.2.up_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.52.mlp.experts.20.down_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.20.gate_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.20.up_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.21.down_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.21.gate_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.21.up_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.22.down_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.22.gate_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.22.up_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.23.down_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.23.gate_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.23.up_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.24.down_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.24.gate_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.24.up_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.25.down_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.25.gate_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.25.up_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.26.down_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.26.gate_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.26.up_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.27.down_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.27.gate_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.27.up_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.28.down_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.28.gate_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.28.up_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.29.down_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.29.gate_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.29.up_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.3.down_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.3.gate_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.3.up_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.30.down_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.30.gate_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.30.up_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.31.down_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.31.gate_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.31.up_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.32.down_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.32.gate_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.32.up_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.33.down_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.33.gate_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.33.up_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.34.down_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.34.gate_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.34.up_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.35.down_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.35.gate_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.35.up_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.36.down_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.36.gate_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.36.up_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.37.down_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.37.gate_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.37.up_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.38.down_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.38.gate_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.38.up_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.39.down_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.39.gate_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.39.up_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.4.down_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.4.gate_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.4.up_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.40.down_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.40.gate_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.40.up_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.41.down_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.41.gate_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.41.up_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.42.down_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.42.gate_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.42.up_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.43.down_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.43.gate_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.43.up_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.44.down_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.44.gate_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.44.up_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.45.down_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.45.gate_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.45.up_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.46.down_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.46.gate_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.46.up_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.47.down_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.47.gate_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.47.up_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.48.down_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.48.gate_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.48.up_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.49.down_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.49.gate_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.49.up_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.5.down_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.5.gate_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.5.up_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.50.down_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.50.gate_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.50.up_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.51.down_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.51.gate_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.51.up_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.52.down_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.52.gate_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.52.up_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.53.down_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.53.gate_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.53.up_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.54.down_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.54.gate_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.54.up_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.55.down_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.55.gate_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.55.up_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.56.down_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.56.gate_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.56.up_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.57.down_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.57.gate_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.57.up_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.58.down_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.58.gate_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.58.up_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.59.down_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.59.gate_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.59.up_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.6.down_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.6.gate_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.6.up_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.60.down_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.60.gate_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.60.up_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.61.down_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.61.gate_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.61.up_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.62.down_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.62.gate_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.62.up_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.63.down_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.63.gate_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.63.up_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.64.down_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.64.gate_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.64.up_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.65.down_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.65.gate_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.65.up_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.66.down_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.66.gate_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.66.up_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.67.down_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.67.gate_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.67.up_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.68.down_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.68.gate_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.68.up_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.69.down_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.69.gate_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.69.up_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.7.down_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.7.gate_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.7.up_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.70.down_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.70.gate_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.70.up_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.71.down_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.71.gate_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.71.up_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.72.down_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.72.gate_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.72.up_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.73.down_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.73.gate_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.73.up_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.74.down_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.74.gate_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.74.up_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.75.down_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.75.gate_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.75.up_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.76.down_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.76.gate_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.76.up_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.77.down_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.77.gate_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.77.up_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.78.down_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.78.gate_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.78.up_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.79.down_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.79.gate_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.79.up_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.8.down_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.8.gate_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.8.up_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.9.down_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.9.gate_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.experts.9.up_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.gate.e_score_correction_bias": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.gate.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.shared_experts.down_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.shared_experts.gate_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.mlp.shared_experts.up_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.52.post_attention_layernorm.weight": "model-00042-of-00075.safetensors", + "model.layers.52.self_attn.k_norm.weight": "model-00041-of-00075.safetensors", + "model.layers.52.self_attn.k_proj.bias": "model-00041-of-00075.safetensors", + "model.layers.52.self_attn.k_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.52.self_attn.o_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.52.self_attn.q_norm.weight": "model-00041-of-00075.safetensors", + "model.layers.52.self_attn.q_proj.bias": "model-00041-of-00075.safetensors", + "model.layers.52.self_attn.q_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.52.self_attn.v_proj.bias": "model-00041-of-00075.safetensors", + "model.layers.52.self_attn.v_proj.weight": "model-00041-of-00075.safetensors", + "model.layers.53.input_layernorm.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.0.down_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.53.mlp.experts.0.gate_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.53.mlp.experts.0.up_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.53.mlp.experts.1.down_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.53.mlp.experts.1.gate_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.53.mlp.experts.1.up_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.53.mlp.experts.10.down_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.53.mlp.experts.10.gate_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.53.mlp.experts.10.up_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.53.mlp.experts.11.down_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.53.mlp.experts.11.gate_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.53.mlp.experts.11.up_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.53.mlp.experts.12.down_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.53.mlp.experts.12.gate_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.53.mlp.experts.12.up_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.53.mlp.experts.13.down_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.53.mlp.experts.13.gate_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.53.mlp.experts.13.up_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.53.mlp.experts.14.down_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.53.mlp.experts.14.gate_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.53.mlp.experts.14.up_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.53.mlp.experts.15.down_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.53.mlp.experts.15.gate_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.53.mlp.experts.15.up_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.53.mlp.experts.16.down_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.53.mlp.experts.16.gate_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.53.mlp.experts.16.up_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.53.mlp.experts.17.down_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.53.mlp.experts.17.gate_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.53.mlp.experts.17.up_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.53.mlp.experts.18.down_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.53.mlp.experts.18.gate_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.53.mlp.experts.18.up_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.53.mlp.experts.19.down_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.53.mlp.experts.19.gate_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.53.mlp.experts.19.up_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.53.mlp.experts.2.down_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.53.mlp.experts.2.gate_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.53.mlp.experts.2.up_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.53.mlp.experts.20.down_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.53.mlp.experts.20.gate_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.53.mlp.experts.20.up_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.53.mlp.experts.21.down_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.21.gate_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.53.mlp.experts.21.up_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.53.mlp.experts.22.down_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.22.gate_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.22.up_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.23.down_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.23.gate_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.23.up_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.24.down_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.24.gate_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.24.up_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.25.down_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.25.gate_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.25.up_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.26.down_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.26.gate_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.26.up_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.27.down_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.27.gate_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.27.up_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.28.down_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.28.gate_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.28.up_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.29.down_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.29.gate_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.29.up_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.3.down_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.53.mlp.experts.3.gate_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.53.mlp.experts.3.up_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.53.mlp.experts.30.down_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.30.gate_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.30.up_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.31.down_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.31.gate_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.31.up_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.32.down_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.32.gate_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.32.up_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.33.down_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.33.gate_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.33.up_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.34.down_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.34.gate_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.34.up_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.35.down_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.35.gate_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.35.up_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.36.down_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.36.gate_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.36.up_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.37.down_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.37.gate_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.37.up_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.38.down_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.38.gate_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.38.up_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.39.down_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.39.gate_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.39.up_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.4.down_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.53.mlp.experts.4.gate_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.53.mlp.experts.4.up_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.53.mlp.experts.40.down_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.40.gate_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.40.up_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.41.down_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.41.gate_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.41.up_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.42.down_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.42.gate_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.42.up_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.43.down_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.43.gate_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.43.up_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.44.down_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.44.gate_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.44.up_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.45.down_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.45.gate_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.45.up_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.46.down_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.46.gate_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.46.up_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.47.down_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.47.gate_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.47.up_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.48.down_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.48.gate_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.48.up_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.49.down_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.49.gate_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.49.up_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.5.down_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.53.mlp.experts.5.gate_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.53.mlp.experts.5.up_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.53.mlp.experts.50.down_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.50.gate_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.50.up_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.51.down_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.51.gate_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.51.up_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.52.down_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.52.gate_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.52.up_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.53.down_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.53.gate_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.53.up_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.54.down_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.54.gate_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.54.up_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.55.down_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.55.gate_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.55.up_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.56.down_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.56.gate_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.56.up_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.57.down_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.57.gate_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.57.up_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.58.down_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.58.gate_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.58.up_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.59.down_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.59.gate_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.59.up_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.6.down_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.53.mlp.experts.6.gate_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.53.mlp.experts.6.up_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.53.mlp.experts.60.down_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.60.gate_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.60.up_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.61.down_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.61.gate_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.61.up_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.62.down_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.62.gate_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.62.up_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.63.down_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.63.gate_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.63.up_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.64.down_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.64.gate_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.64.up_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.65.down_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.65.gate_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.65.up_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.66.down_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.66.gate_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.66.up_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.67.down_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.67.gate_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.67.up_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.68.down_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.68.gate_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.68.up_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.69.down_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.69.gate_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.69.up_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.7.down_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.53.mlp.experts.7.gate_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.53.mlp.experts.7.up_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.53.mlp.experts.70.down_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.70.gate_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.70.up_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.71.down_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.71.gate_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.71.up_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.72.down_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.72.gate_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.72.up_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.73.down_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.73.gate_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.73.up_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.74.down_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.74.gate_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.74.up_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.75.down_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.75.gate_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.75.up_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.76.down_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.76.gate_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.76.up_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.77.down_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.77.gate_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.77.up_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.78.down_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.78.gate_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.78.up_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.79.down_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.79.gate_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.79.up_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.experts.8.down_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.53.mlp.experts.8.gate_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.53.mlp.experts.8.up_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.53.mlp.experts.9.down_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.53.mlp.experts.9.gate_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.53.mlp.experts.9.up_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.53.mlp.gate.e_score_correction_bias": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.gate.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.shared_experts.down_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.shared_experts.gate_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.mlp.shared_experts.up_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.53.post_attention_layernorm.weight": "model-00043-of-00075.safetensors", + "model.layers.53.self_attn.k_norm.weight": "model-00042-of-00075.safetensors", + "model.layers.53.self_attn.k_proj.bias": "model-00042-of-00075.safetensors", + "model.layers.53.self_attn.k_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.53.self_attn.o_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.53.self_attn.q_norm.weight": "model-00042-of-00075.safetensors", + "model.layers.53.self_attn.q_proj.bias": "model-00042-of-00075.safetensors", + "model.layers.53.self_attn.q_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.53.self_attn.v_proj.bias": "model-00042-of-00075.safetensors", + "model.layers.53.self_attn.v_proj.weight": "model-00042-of-00075.safetensors", + "model.layers.54.input_layernorm.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.0.down_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.0.gate_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.0.up_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.1.down_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.1.gate_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.1.up_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.10.down_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.10.gate_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.10.up_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.11.down_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.11.gate_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.11.up_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.12.down_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.12.gate_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.12.up_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.13.down_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.13.gate_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.13.up_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.14.down_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.14.gate_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.14.up_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.15.down_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.15.gate_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.15.up_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.16.down_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.16.gate_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.16.up_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.17.down_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.17.gate_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.17.up_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.18.down_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.18.gate_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.18.up_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.19.down_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.19.gate_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.19.up_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.2.down_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.2.gate_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.2.up_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.20.down_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.20.gate_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.20.up_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.21.down_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.21.gate_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.21.up_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.22.down_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.22.gate_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.22.up_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.23.down_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.23.gate_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.23.up_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.24.down_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.24.gate_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.24.up_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.25.down_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.25.gate_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.25.up_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.26.down_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.26.gate_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.26.up_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.27.down_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.27.gate_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.27.up_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.28.down_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.28.gate_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.28.up_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.29.down_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.29.gate_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.29.up_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.3.down_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.3.gate_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.3.up_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.30.down_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.30.gate_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.30.up_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.31.down_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.31.gate_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.31.up_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.32.down_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.32.gate_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.32.up_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.33.down_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.33.gate_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.33.up_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.34.down_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.34.gate_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.34.up_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.35.down_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.35.gate_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.35.up_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.36.down_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.36.gate_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.36.up_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.37.down_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.37.gate_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.37.up_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.38.down_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.38.gate_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.38.up_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.39.down_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.39.gate_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.39.up_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.4.down_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.4.gate_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.4.up_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.40.down_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.40.gate_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.40.up_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.41.down_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.41.gate_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.41.up_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.42.down_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.42.gate_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.42.up_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.43.down_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.43.gate_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.43.up_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.44.down_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.44.gate_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.44.up_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.45.down_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.45.gate_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.45.up_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.46.down_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.46.gate_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.46.up_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.47.down_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.47.gate_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.47.up_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.48.down_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.48.gate_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.48.up_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.49.down_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.49.gate_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.49.up_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.5.down_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.5.gate_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.5.up_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.50.down_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.50.gate_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.50.up_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.51.down_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.51.gate_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.51.up_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.52.down_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.52.gate_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.52.up_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.53.down_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.53.gate_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.53.up_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.54.down_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.54.gate_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.54.up_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.55.down_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.55.gate_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.55.up_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.56.down_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.56.gate_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.56.up_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.57.down_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.57.gate_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.57.up_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.58.down_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.58.gate_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.58.up_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.59.down_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.59.gate_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.59.up_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.6.down_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.6.gate_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.6.up_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.60.down_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.60.gate_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.60.up_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.61.down_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.61.gate_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.61.up_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.62.down_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.62.gate_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.62.up_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.63.down_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.63.gate_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.63.up_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.64.down_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.64.gate_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.64.up_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.65.down_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.65.gate_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.65.up_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.66.down_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.66.gate_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.66.up_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.67.down_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.67.gate_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.67.up_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.68.down_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.68.gate_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.68.up_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.69.down_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.69.gate_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.69.up_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.7.down_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.7.gate_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.7.up_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.70.down_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.70.gate_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.70.up_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.71.down_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.71.gate_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.71.up_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.72.down_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.72.gate_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.72.up_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.73.down_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.73.gate_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.73.up_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.74.down_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.74.gate_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.74.up_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.75.down_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.75.gate_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.75.up_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.76.down_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.76.gate_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.76.up_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.77.down_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.77.gate_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.77.up_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.78.down_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.78.gate_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.78.up_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.79.down_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.79.gate_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.79.up_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.experts.8.down_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.8.gate_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.8.up_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.9.down_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.9.gate_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.experts.9.up_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.mlp.gate.e_score_correction_bias": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.gate.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.shared_experts.down_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.shared_experts.gate_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.mlp.shared_experts.up_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.54.post_attention_layernorm.weight": "model-00044-of-00075.safetensors", + "model.layers.54.self_attn.k_norm.weight": "model-00043-of-00075.safetensors", + "model.layers.54.self_attn.k_proj.bias": "model-00043-of-00075.safetensors", + "model.layers.54.self_attn.k_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.self_attn.o_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.self_attn.q_norm.weight": "model-00043-of-00075.safetensors", + "model.layers.54.self_attn.q_proj.bias": "model-00043-of-00075.safetensors", + "model.layers.54.self_attn.q_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.54.self_attn.v_proj.bias": "model-00043-of-00075.safetensors", + "model.layers.54.self_attn.v_proj.weight": "model-00043-of-00075.safetensors", + "model.layers.55.input_layernorm.weight": "model-00045-of-00075.safetensors", + "model.layers.55.mlp.experts.0.down_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.0.gate_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.0.up_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.1.down_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.1.gate_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.1.up_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.10.down_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.10.gate_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.10.up_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.11.down_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.11.gate_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.11.up_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.12.down_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.12.gate_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.12.up_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.13.down_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.13.gate_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.13.up_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.14.down_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.14.gate_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.14.up_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.15.down_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.15.gate_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.15.up_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.16.down_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.16.gate_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.16.up_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.17.down_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.17.gate_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.17.up_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.18.down_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.18.gate_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.18.up_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.19.down_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.19.gate_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.19.up_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.2.down_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.2.gate_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.2.up_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.20.down_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.20.gate_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.20.up_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.21.down_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.21.gate_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.21.up_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.22.down_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.22.gate_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.22.up_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.23.down_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.23.gate_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.23.up_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.24.down_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.24.gate_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.24.up_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.25.down_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.25.gate_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.25.up_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.26.down_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.26.gate_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.26.up_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.27.down_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.27.gate_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.27.up_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.28.down_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.28.gate_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.28.up_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.29.down_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.29.gate_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.29.up_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.3.down_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.3.gate_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.3.up_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.30.down_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.30.gate_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.30.up_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.31.down_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.31.gate_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.31.up_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.32.down_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.32.gate_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.32.up_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.33.down_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.33.gate_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.33.up_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.34.down_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.34.gate_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.34.up_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.35.down_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.35.gate_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.35.up_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.36.down_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.36.gate_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.36.up_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.37.down_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.37.gate_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.37.up_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.38.down_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.38.gate_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.38.up_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.39.down_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.39.gate_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.39.up_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.4.down_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.4.gate_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.4.up_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.40.down_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.40.gate_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.40.up_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.41.down_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.41.gate_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.41.up_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.42.down_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.42.gate_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.42.up_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.43.down_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.43.gate_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.43.up_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.44.down_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.44.gate_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.44.up_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.45.down_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.45.gate_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.45.up_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.46.down_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.46.gate_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.46.up_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.47.down_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.47.gate_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.47.up_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.48.down_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.48.gate_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.48.up_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.49.down_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.49.gate_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.49.up_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.5.down_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.5.gate_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.5.up_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.50.down_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.50.gate_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.50.up_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.51.down_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.51.gate_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.51.up_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.52.down_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.52.gate_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.52.up_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.53.down_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.53.gate_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.53.up_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.54.down_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.54.gate_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.54.up_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.55.down_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.55.gate_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.55.up_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.56.down_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.56.gate_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.56.up_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.57.down_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.57.gate_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.57.up_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.58.down_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.58.gate_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.58.up_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.59.down_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.55.mlp.experts.59.gate_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.59.up_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.6.down_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.6.gate_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.6.up_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.60.down_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.55.mlp.experts.60.gate_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.55.mlp.experts.60.up_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.55.mlp.experts.61.down_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.55.mlp.experts.61.gate_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.55.mlp.experts.61.up_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.55.mlp.experts.62.down_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.55.mlp.experts.62.gate_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.55.mlp.experts.62.up_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.55.mlp.experts.63.down_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.55.mlp.experts.63.gate_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.55.mlp.experts.63.up_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.55.mlp.experts.64.down_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.55.mlp.experts.64.gate_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.55.mlp.experts.64.up_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.55.mlp.experts.65.down_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.55.mlp.experts.65.gate_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.55.mlp.experts.65.up_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.55.mlp.experts.66.down_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.55.mlp.experts.66.gate_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.55.mlp.experts.66.up_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.55.mlp.experts.67.down_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.55.mlp.experts.67.gate_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.55.mlp.experts.67.up_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.55.mlp.experts.68.down_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.55.mlp.experts.68.gate_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.55.mlp.experts.68.up_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.55.mlp.experts.69.down_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.55.mlp.experts.69.gate_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.55.mlp.experts.69.up_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.55.mlp.experts.7.down_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.7.gate_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.7.up_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.70.down_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.55.mlp.experts.70.gate_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.55.mlp.experts.70.up_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.55.mlp.experts.71.down_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.55.mlp.experts.71.gate_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.55.mlp.experts.71.up_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.55.mlp.experts.72.down_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.55.mlp.experts.72.gate_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.55.mlp.experts.72.up_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.55.mlp.experts.73.down_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.55.mlp.experts.73.gate_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.55.mlp.experts.73.up_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.55.mlp.experts.74.down_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.55.mlp.experts.74.gate_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.55.mlp.experts.74.up_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.55.mlp.experts.75.down_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.55.mlp.experts.75.gate_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.55.mlp.experts.75.up_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.55.mlp.experts.76.down_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.55.mlp.experts.76.gate_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.55.mlp.experts.76.up_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.55.mlp.experts.77.down_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.55.mlp.experts.77.gate_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.55.mlp.experts.77.up_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.55.mlp.experts.78.down_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.55.mlp.experts.78.gate_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.55.mlp.experts.78.up_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.55.mlp.experts.79.down_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.55.mlp.experts.79.gate_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.55.mlp.experts.79.up_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.55.mlp.experts.8.down_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.8.gate_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.8.up_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.9.down_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.9.gate_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.experts.9.up_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.mlp.gate.e_score_correction_bias": "model-00045-of-00075.safetensors", + "model.layers.55.mlp.gate.weight": "model-00045-of-00075.safetensors", + "model.layers.55.mlp.shared_experts.down_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.55.mlp.shared_experts.gate_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.55.mlp.shared_experts.up_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.55.post_attention_layernorm.weight": "model-00045-of-00075.safetensors", + "model.layers.55.self_attn.k_norm.weight": "model-00044-of-00075.safetensors", + "model.layers.55.self_attn.k_proj.bias": "model-00044-of-00075.safetensors", + "model.layers.55.self_attn.k_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.self_attn.o_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.self_attn.q_norm.weight": "model-00044-of-00075.safetensors", + "model.layers.55.self_attn.q_proj.bias": "model-00044-of-00075.safetensors", + "model.layers.55.self_attn.q_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.55.self_attn.v_proj.bias": "model-00044-of-00075.safetensors", + "model.layers.55.self_attn.v_proj.weight": "model-00044-of-00075.safetensors", + "model.layers.56.input_layernorm.weight": "model-00046-of-00075.safetensors", + "model.layers.56.mlp.experts.0.down_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.0.gate_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.0.up_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.1.down_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.1.gate_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.1.up_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.10.down_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.10.gate_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.10.up_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.11.down_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.11.gate_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.11.up_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.12.down_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.12.gate_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.12.up_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.13.down_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.13.gate_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.13.up_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.14.down_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.14.gate_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.14.up_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.15.down_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.15.gate_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.15.up_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.16.down_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.16.gate_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.16.up_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.17.down_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.17.gate_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.17.up_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.18.down_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.18.gate_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.18.up_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.19.down_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.19.gate_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.19.up_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.2.down_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.2.gate_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.2.up_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.20.down_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.20.gate_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.20.up_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.21.down_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.21.gate_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.21.up_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.22.down_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.22.gate_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.22.up_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.23.down_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.23.gate_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.23.up_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.24.down_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.24.gate_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.24.up_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.25.down_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.25.gate_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.25.up_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.26.down_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.26.gate_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.26.up_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.27.down_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.27.gate_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.27.up_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.28.down_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.28.gate_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.28.up_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.29.down_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.29.gate_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.29.up_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.3.down_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.3.gate_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.3.up_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.30.down_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.30.gate_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.30.up_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.31.down_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.31.gate_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.31.up_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.32.down_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.32.gate_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.32.up_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.33.down_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.33.gate_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.33.up_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.34.down_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.34.gate_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.34.up_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.35.down_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.35.gate_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.35.up_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.36.down_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.36.gate_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.36.up_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.37.down_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.37.gate_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.37.up_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.38.down_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.38.gate_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.38.up_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.39.down_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.39.gate_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.39.up_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.4.down_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.4.gate_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.4.up_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.40.down_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.40.gate_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.40.up_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.41.down_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.41.gate_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.41.up_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.42.down_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.42.gate_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.42.up_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.43.down_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.43.gate_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.43.up_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.44.down_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.44.gate_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.44.up_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.45.down_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.45.gate_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.45.up_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.46.down_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.46.gate_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.46.up_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.47.down_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.47.gate_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.47.up_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.48.down_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.48.gate_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.48.up_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.49.down_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.49.gate_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.49.up_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.5.down_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.5.gate_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.5.up_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.50.down_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.50.gate_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.50.up_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.51.down_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.51.gate_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.51.up_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.52.down_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.52.gate_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.52.up_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.53.down_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.53.gate_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.53.up_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.54.down_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.54.gate_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.54.up_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.55.down_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.55.gate_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.55.up_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.56.down_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.56.gate_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.56.up_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.57.down_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.57.gate_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.57.up_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.58.down_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.58.gate_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.58.up_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.59.down_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.59.gate_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.59.up_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.6.down_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.6.gate_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.6.up_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.60.down_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.60.gate_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.60.up_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.61.down_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.61.gate_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.61.up_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.62.down_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.62.gate_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.62.up_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.63.down_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.63.gate_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.63.up_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.64.down_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.64.gate_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.64.up_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.65.down_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.65.gate_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.65.up_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.66.down_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.66.gate_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.66.up_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.67.down_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.67.gate_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.67.up_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.68.down_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.68.gate_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.68.up_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.69.down_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.69.gate_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.69.up_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.7.down_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.7.gate_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.7.up_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.70.down_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.70.gate_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.70.up_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.71.down_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.71.gate_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.71.up_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.72.down_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.72.gate_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.72.up_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.73.down_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.73.gate_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.73.up_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.74.down_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.74.gate_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.74.up_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.75.down_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.75.gate_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.75.up_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.76.down_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.76.gate_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.76.up_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.77.down_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.77.gate_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.77.up_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.78.down_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.56.mlp.experts.78.gate_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.78.up_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.79.down_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.56.mlp.experts.79.gate_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.56.mlp.experts.79.up_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.56.mlp.experts.8.down_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.8.gate_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.8.up_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.9.down_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.9.gate_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.experts.9.up_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.mlp.gate.e_score_correction_bias": "model-00046-of-00075.safetensors", + "model.layers.56.mlp.gate.weight": "model-00046-of-00075.safetensors", + "model.layers.56.mlp.shared_experts.down_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.56.mlp.shared_experts.gate_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.56.mlp.shared_experts.up_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.56.post_attention_layernorm.weight": "model-00046-of-00075.safetensors", + "model.layers.56.self_attn.k_norm.weight": "model-00045-of-00075.safetensors", + "model.layers.56.self_attn.k_proj.bias": "model-00045-of-00075.safetensors", + "model.layers.56.self_attn.k_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.self_attn.o_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.self_attn.q_norm.weight": "model-00045-of-00075.safetensors", + "model.layers.56.self_attn.q_proj.bias": "model-00045-of-00075.safetensors", + "model.layers.56.self_attn.q_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.56.self_attn.v_proj.bias": "model-00045-of-00075.safetensors", + "model.layers.56.self_attn.v_proj.weight": "model-00045-of-00075.safetensors", + "model.layers.57.input_layernorm.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.0.down_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.0.gate_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.0.up_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.1.down_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.1.gate_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.1.up_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.10.down_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.10.gate_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.10.up_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.11.down_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.11.gate_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.11.up_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.12.down_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.12.gate_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.12.up_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.13.down_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.13.gate_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.13.up_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.14.down_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.14.gate_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.14.up_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.15.down_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.15.gate_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.15.up_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.16.down_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.16.gate_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.16.up_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.17.down_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.17.gate_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.17.up_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.18.down_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.18.gate_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.18.up_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.19.down_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.19.gate_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.19.up_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.2.down_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.2.gate_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.2.up_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.20.down_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.20.gate_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.20.up_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.21.down_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.21.gate_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.21.up_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.22.down_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.22.gate_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.22.up_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.23.down_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.23.gate_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.23.up_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.24.down_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.24.gate_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.24.up_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.25.down_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.25.gate_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.25.up_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.26.down_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.26.gate_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.26.up_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.27.down_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.27.gate_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.27.up_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.28.down_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.28.gate_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.28.up_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.29.down_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.29.gate_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.29.up_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.3.down_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.3.gate_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.3.up_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.30.down_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.30.gate_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.30.up_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.31.down_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.31.gate_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.31.up_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.32.down_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.32.gate_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.32.up_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.33.down_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.33.gate_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.33.up_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.34.down_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.34.gate_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.34.up_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.35.down_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.35.gate_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.35.up_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.36.down_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.36.gate_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.36.up_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.37.down_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.37.gate_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.37.up_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.38.down_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.38.gate_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.38.up_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.39.down_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.39.gate_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.39.up_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.4.down_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.4.gate_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.4.up_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.40.down_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.40.gate_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.40.up_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.41.down_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.41.gate_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.41.up_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.42.down_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.42.gate_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.42.up_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.43.down_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.43.gate_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.43.up_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.44.down_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.44.gate_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.44.up_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.45.down_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.45.gate_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.45.up_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.46.down_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.46.gate_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.46.up_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.47.down_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.47.gate_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.47.up_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.48.down_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.48.gate_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.48.up_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.49.down_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.49.gate_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.49.up_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.5.down_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.5.gate_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.5.up_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.50.down_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.50.gate_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.50.up_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.51.down_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.51.gate_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.51.up_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.52.down_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.52.gate_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.52.up_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.53.down_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.53.gate_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.53.up_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.54.down_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.54.gate_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.54.up_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.55.down_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.55.gate_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.55.up_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.56.down_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.56.gate_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.56.up_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.57.down_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.57.gate_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.57.up_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.58.down_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.58.gate_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.58.up_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.59.down_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.59.gate_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.59.up_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.6.down_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.6.gate_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.6.up_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.60.down_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.60.gate_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.60.up_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.61.down_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.61.gate_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.61.up_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.62.down_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.62.gate_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.62.up_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.63.down_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.63.gate_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.63.up_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.64.down_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.64.gate_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.64.up_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.65.down_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.65.gate_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.65.up_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.66.down_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.66.gate_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.66.up_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.67.down_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.67.gate_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.67.up_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.68.down_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.68.gate_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.68.up_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.69.down_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.69.gate_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.69.up_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.7.down_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.7.gate_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.7.up_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.70.down_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.70.gate_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.70.up_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.71.down_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.71.gate_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.71.up_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.72.down_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.72.gate_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.72.up_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.73.down_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.73.gate_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.73.up_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.74.down_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.74.gate_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.74.up_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.75.down_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.75.gate_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.75.up_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.76.down_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.76.gate_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.76.up_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.77.down_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.77.gate_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.77.up_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.78.down_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.78.gate_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.78.up_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.79.down_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.79.gate_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.79.up_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.8.down_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.8.gate_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.8.up_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.9.down_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.9.gate_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.experts.9.up_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.gate.e_score_correction_bias": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.gate.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.shared_experts.down_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.shared_experts.gate_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.mlp.shared_experts.up_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.post_attention_layernorm.weight": "model-00046-of-00075.safetensors", + "model.layers.57.self_attn.k_norm.weight": "model-00046-of-00075.safetensors", + "model.layers.57.self_attn.k_proj.bias": "model-00046-of-00075.safetensors", + "model.layers.57.self_attn.k_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.self_attn.o_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.self_attn.q_norm.weight": "model-00046-of-00075.safetensors", + "model.layers.57.self_attn.q_proj.bias": "model-00046-of-00075.safetensors", + "model.layers.57.self_attn.q_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.57.self_attn.v_proj.bias": "model-00046-of-00075.safetensors", + "model.layers.57.self_attn.v_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.58.input_layernorm.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.0.down_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.58.mlp.experts.0.gate_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.58.mlp.experts.0.up_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.58.mlp.experts.1.down_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.58.mlp.experts.1.gate_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.58.mlp.experts.1.up_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.58.mlp.experts.10.down_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.58.mlp.experts.10.gate_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.58.mlp.experts.10.up_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.58.mlp.experts.11.down_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.11.gate_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.11.up_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.12.down_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.12.gate_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.12.up_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.13.down_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.13.gate_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.13.up_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.14.down_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.14.gate_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.14.up_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.15.down_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.15.gate_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.15.up_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.16.down_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.16.gate_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.16.up_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.17.down_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.17.gate_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.17.up_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.18.down_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.18.gate_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.18.up_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.19.down_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.19.gate_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.19.up_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.2.down_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.58.mlp.experts.2.gate_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.58.mlp.experts.2.up_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.58.mlp.experts.20.down_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.20.gate_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.20.up_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.21.down_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.21.gate_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.21.up_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.22.down_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.22.gate_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.22.up_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.23.down_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.23.gate_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.23.up_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.24.down_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.24.gate_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.24.up_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.25.down_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.25.gate_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.25.up_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.26.down_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.26.gate_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.26.up_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.27.down_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.27.gate_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.27.up_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.28.down_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.28.gate_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.28.up_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.29.down_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.29.gate_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.29.up_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.3.down_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.58.mlp.experts.3.gate_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.58.mlp.experts.3.up_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.58.mlp.experts.30.down_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.30.gate_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.30.up_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.31.down_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.31.gate_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.31.up_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.32.down_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.32.gate_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.32.up_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.33.down_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.33.gate_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.33.up_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.34.down_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.34.gate_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.34.up_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.35.down_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.35.gate_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.35.up_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.36.down_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.36.gate_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.36.up_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.37.down_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.37.gate_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.37.up_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.38.down_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.38.gate_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.38.up_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.39.down_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.39.gate_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.39.up_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.4.down_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.58.mlp.experts.4.gate_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.58.mlp.experts.4.up_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.58.mlp.experts.40.down_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.40.gate_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.40.up_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.41.down_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.41.gate_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.41.up_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.42.down_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.42.gate_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.42.up_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.43.down_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.43.gate_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.43.up_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.44.down_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.44.gate_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.44.up_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.45.down_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.45.gate_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.45.up_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.46.down_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.46.gate_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.46.up_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.47.down_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.47.gate_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.47.up_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.48.down_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.48.gate_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.48.up_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.49.down_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.49.gate_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.49.up_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.5.down_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.58.mlp.experts.5.gate_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.58.mlp.experts.5.up_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.58.mlp.experts.50.down_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.50.gate_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.50.up_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.51.down_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.51.gate_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.51.up_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.52.down_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.52.gate_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.52.up_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.53.down_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.53.gate_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.53.up_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.54.down_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.54.gate_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.54.up_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.55.down_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.55.gate_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.55.up_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.56.down_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.56.gate_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.56.up_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.57.down_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.57.gate_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.57.up_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.58.down_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.58.gate_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.58.up_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.59.down_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.59.gate_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.59.up_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.6.down_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.58.mlp.experts.6.gate_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.58.mlp.experts.6.up_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.58.mlp.experts.60.down_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.60.gate_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.60.up_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.61.down_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.61.gate_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.61.up_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.62.down_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.62.gate_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.62.up_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.63.down_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.63.gate_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.63.up_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.64.down_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.64.gate_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.64.up_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.65.down_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.65.gate_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.65.up_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.66.down_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.66.gate_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.66.up_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.67.down_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.67.gate_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.67.up_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.68.down_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.68.gate_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.68.up_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.69.down_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.69.gate_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.69.up_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.7.down_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.58.mlp.experts.7.gate_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.58.mlp.experts.7.up_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.58.mlp.experts.70.down_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.70.gate_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.70.up_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.71.down_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.71.gate_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.71.up_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.72.down_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.72.gate_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.72.up_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.73.down_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.73.gate_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.73.up_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.74.down_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.74.gate_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.74.up_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.75.down_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.75.gate_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.75.up_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.76.down_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.76.gate_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.76.up_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.77.down_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.77.gate_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.77.up_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.78.down_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.78.gate_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.78.up_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.79.down_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.79.gate_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.79.up_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.experts.8.down_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.58.mlp.experts.8.gate_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.58.mlp.experts.8.up_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.58.mlp.experts.9.down_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.58.mlp.experts.9.gate_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.58.mlp.experts.9.up_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.58.mlp.gate.e_score_correction_bias": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.gate.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.shared_experts.down_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.shared_experts.gate_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.mlp.shared_experts.up_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.58.post_attention_layernorm.weight": "model-00047-of-00075.safetensors", + "model.layers.58.self_attn.k_norm.weight": "model-00046-of-00075.safetensors", + "model.layers.58.self_attn.k_proj.bias": "model-00046-of-00075.safetensors", + "model.layers.58.self_attn.k_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.58.self_attn.o_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.58.self_attn.q_norm.weight": "model-00046-of-00075.safetensors", + "model.layers.58.self_attn.q_proj.bias": "model-00046-of-00075.safetensors", + "model.layers.58.self_attn.q_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.58.self_attn.v_proj.bias": "model-00046-of-00075.safetensors", + "model.layers.58.self_attn.v_proj.weight": "model-00046-of-00075.safetensors", + "model.layers.59.input_layernorm.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.0.down_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.59.mlp.experts.0.gate_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.59.mlp.experts.0.up_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.59.mlp.experts.1.down_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.59.mlp.experts.1.gate_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.59.mlp.experts.1.up_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.59.mlp.experts.10.down_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.59.mlp.experts.10.gate_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.59.mlp.experts.10.up_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.59.mlp.experts.11.down_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.59.mlp.experts.11.gate_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.59.mlp.experts.11.up_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.59.mlp.experts.12.down_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.59.mlp.experts.12.gate_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.59.mlp.experts.12.up_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.59.mlp.experts.13.down_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.59.mlp.experts.13.gate_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.59.mlp.experts.13.up_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.59.mlp.experts.14.down_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.59.mlp.experts.14.gate_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.59.mlp.experts.14.up_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.59.mlp.experts.15.down_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.59.mlp.experts.15.gate_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.59.mlp.experts.15.up_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.59.mlp.experts.16.down_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.59.mlp.experts.16.gate_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.59.mlp.experts.16.up_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.59.mlp.experts.17.down_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.59.mlp.experts.17.gate_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.59.mlp.experts.17.up_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.59.mlp.experts.18.down_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.59.mlp.experts.18.gate_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.59.mlp.experts.18.up_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.59.mlp.experts.19.down_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.59.mlp.experts.19.gate_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.59.mlp.experts.19.up_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.59.mlp.experts.2.down_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.59.mlp.experts.2.gate_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.59.mlp.experts.2.up_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.59.mlp.experts.20.down_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.59.mlp.experts.20.gate_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.59.mlp.experts.20.up_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.59.mlp.experts.21.down_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.59.mlp.experts.21.gate_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.59.mlp.experts.21.up_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.59.mlp.experts.22.down_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.59.mlp.experts.22.gate_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.59.mlp.experts.22.up_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.59.mlp.experts.23.down_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.59.mlp.experts.23.gate_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.59.mlp.experts.23.up_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.59.mlp.experts.24.down_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.59.mlp.experts.24.gate_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.59.mlp.experts.24.up_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.59.mlp.experts.25.down_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.59.mlp.experts.25.gate_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.59.mlp.experts.25.up_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.59.mlp.experts.26.down_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.59.mlp.experts.26.gate_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.59.mlp.experts.26.up_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.59.mlp.experts.27.down_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.59.mlp.experts.27.gate_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.59.mlp.experts.27.up_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.59.mlp.experts.28.down_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.59.mlp.experts.28.gate_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.59.mlp.experts.28.up_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.59.mlp.experts.29.down_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.59.mlp.experts.29.gate_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.59.mlp.experts.29.up_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.59.mlp.experts.3.down_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.59.mlp.experts.3.gate_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.59.mlp.experts.3.up_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.59.mlp.experts.30.down_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.30.gate_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.30.up_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.31.down_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.31.gate_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.31.up_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.32.down_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.32.gate_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.32.up_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.33.down_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.33.gate_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.33.up_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.34.down_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.34.gate_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.34.up_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.35.down_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.35.gate_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.35.up_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.36.down_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.36.gate_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.36.up_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.37.down_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.37.gate_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.37.up_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.38.down_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.38.gate_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.38.up_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.39.down_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.39.gate_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.39.up_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.4.down_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.59.mlp.experts.4.gate_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.59.mlp.experts.4.up_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.59.mlp.experts.40.down_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.40.gate_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.40.up_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.41.down_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.41.gate_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.41.up_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.42.down_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.42.gate_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.42.up_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.43.down_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.43.gate_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.43.up_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.44.down_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.44.gate_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.44.up_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.45.down_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.45.gate_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.45.up_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.46.down_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.46.gate_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.46.up_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.47.down_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.47.gate_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.47.up_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.48.down_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.48.gate_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.48.up_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.49.down_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.49.gate_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.49.up_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.5.down_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.59.mlp.experts.5.gate_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.59.mlp.experts.5.up_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.59.mlp.experts.50.down_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.50.gate_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.50.up_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.51.down_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.51.gate_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.51.up_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.52.down_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.52.gate_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.52.up_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.53.down_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.53.gate_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.53.up_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.54.down_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.54.gate_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.54.up_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.55.down_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.55.gate_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.55.up_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.56.down_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.56.gate_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.56.up_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.57.down_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.57.gate_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.57.up_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.58.down_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.58.gate_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.58.up_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.59.down_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.59.gate_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.59.up_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.6.down_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.59.mlp.experts.6.gate_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.59.mlp.experts.6.up_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.59.mlp.experts.60.down_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.60.gate_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.60.up_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.61.down_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.61.gate_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.61.up_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.62.down_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.62.gate_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.62.up_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.63.down_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.63.gate_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.63.up_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.64.down_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.64.gate_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.64.up_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.65.down_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.65.gate_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.65.up_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.66.down_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.66.gate_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.66.up_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.67.down_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.67.gate_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.67.up_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.68.down_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.68.gate_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.68.up_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.69.down_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.69.gate_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.69.up_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.7.down_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.59.mlp.experts.7.gate_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.59.mlp.experts.7.up_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.59.mlp.experts.70.down_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.70.gate_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.70.up_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.71.down_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.71.gate_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.71.up_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.72.down_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.72.gate_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.72.up_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.73.down_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.73.gate_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.73.up_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.74.down_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.74.gate_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.74.up_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.75.down_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.75.gate_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.75.up_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.76.down_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.76.gate_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.76.up_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.77.down_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.77.gate_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.77.up_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.78.down_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.78.gate_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.78.up_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.79.down_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.79.gate_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.79.up_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.experts.8.down_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.59.mlp.experts.8.gate_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.59.mlp.experts.8.up_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.59.mlp.experts.9.down_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.59.mlp.experts.9.gate_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.59.mlp.experts.9.up_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.59.mlp.gate.e_score_correction_bias": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.gate.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.shared_experts.down_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.shared_experts.gate_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.mlp.shared_experts.up_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.59.post_attention_layernorm.weight": "model-00048-of-00075.safetensors", + "model.layers.59.self_attn.k_norm.weight": "model-00047-of-00075.safetensors", + "model.layers.59.self_attn.k_proj.bias": "model-00047-of-00075.safetensors", + "model.layers.59.self_attn.k_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.59.self_attn.o_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.59.self_attn.q_norm.weight": "model-00047-of-00075.safetensors", + "model.layers.59.self_attn.q_proj.bias": "model-00047-of-00075.safetensors", + "model.layers.59.self_attn.q_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.59.self_attn.v_proj.bias": "model-00047-of-00075.safetensors", + "model.layers.59.self_attn.v_proj.weight": "model-00047-of-00075.safetensors", + "model.layers.6.input_layernorm.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.0.down_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.0.gate_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.0.up_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.1.down_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.1.gate_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.1.up_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.10.down_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.10.gate_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.10.up_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.11.down_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.11.gate_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.11.up_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.12.down_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.12.gate_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.12.up_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.13.down_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.13.gate_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.13.up_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.14.down_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.14.gate_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.14.up_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.15.down_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.15.gate_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.15.up_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.16.down_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.16.gate_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.16.up_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.17.down_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.17.gate_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.17.up_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.18.down_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.18.gate_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.18.up_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.19.down_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.19.gate_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.19.up_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.2.down_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.2.gate_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.2.up_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.20.down_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.20.gate_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.20.up_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.21.down_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.21.gate_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.21.up_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.22.down_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.22.gate_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.22.up_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.23.down_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.23.gate_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.23.up_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.24.down_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.24.gate_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.24.up_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.25.down_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.25.gate_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.25.up_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.26.down_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.26.gate_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.26.up_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.27.down_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.27.gate_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.27.up_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.28.down_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.28.gate_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.28.up_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.29.down_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.29.gate_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.29.up_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.3.down_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.3.gate_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.3.up_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.30.down_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.30.gate_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.30.up_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.31.down_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.31.gate_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.31.up_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.32.down_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.32.gate_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.32.up_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.33.down_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.33.gate_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.33.up_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.34.down_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.34.gate_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.34.up_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.35.down_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.35.gate_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.35.up_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.36.down_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.36.gate_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.36.up_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.37.down_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.37.gate_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.37.up_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.38.down_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.38.gate_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.38.up_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.39.down_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.39.gate_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.39.up_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.4.down_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.4.gate_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.4.up_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.40.down_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.40.gate_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.40.up_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.41.down_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.41.gate_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.41.up_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.42.down_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.42.gate_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.42.up_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.43.down_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.43.gate_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.43.up_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.44.down_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.44.gate_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.44.up_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.45.down_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.45.gate_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.45.up_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.46.down_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.46.gate_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.46.up_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.47.down_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.47.gate_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.47.up_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.48.down_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.48.gate_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.48.up_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.49.down_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.49.gate_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.49.up_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.5.down_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.5.gate_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.5.up_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.50.down_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.50.gate_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.50.up_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.51.down_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.51.gate_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.51.up_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.52.down_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.52.gate_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.52.up_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.53.down_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.53.gate_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.53.up_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.54.down_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.54.gate_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.54.up_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.55.down_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.55.gate_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.55.up_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.56.down_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.56.gate_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.56.up_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.57.down_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.57.gate_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.57.up_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.58.down_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.58.gate_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.58.up_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.59.down_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.59.gate_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.59.up_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.6.down_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.6.gate_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.6.up_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.60.down_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.60.gate_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.60.up_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.61.down_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.61.gate_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.61.up_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.62.down_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.62.gate_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.62.up_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.63.down_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.63.gate_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.63.up_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.64.down_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.64.gate_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.64.up_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.65.down_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.65.gate_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.65.up_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.66.down_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.66.gate_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.66.up_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.67.down_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.67.gate_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.67.up_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.68.down_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.68.gate_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.68.up_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.69.down_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.69.gate_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.69.up_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.7.down_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.7.gate_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.7.up_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.70.down_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.70.gate_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.70.up_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.71.down_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.71.gate_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.71.up_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.72.down_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.72.gate_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.72.up_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.73.down_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.73.gate_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.73.up_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.74.down_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.74.gate_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.74.up_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.75.down_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.75.gate_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.75.up_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.76.down_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.76.gate_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.76.up_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.77.down_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.77.gate_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.77.up_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.78.down_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.78.gate_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.78.up_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.79.down_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.79.gate_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.79.up_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.8.down_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.8.gate_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.8.up_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.9.down_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.9.gate_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.experts.9.up_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.gate.e_score_correction_bias": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.gate.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.shared_experts.down_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.shared_experts.gate_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.mlp.shared_experts.up_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00004-of-00075.safetensors", + "model.layers.6.self_attn.k_norm.weight": "model-00004-of-00075.safetensors", + "model.layers.6.self_attn.k_proj.bias": "model-00004-of-00075.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.self_attn.q_norm.weight": "model-00004-of-00075.safetensors", + "model.layers.6.self_attn.q_proj.bias": "model-00004-of-00075.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.6.self_attn.v_proj.bias": "model-00004-of-00075.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00004-of-00075.safetensors", + "model.layers.60.input_layernorm.weight": "model-00049-of-00075.safetensors", + "model.layers.60.mlp.experts.0.down_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.0.gate_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.0.up_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.1.down_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.1.gate_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.1.up_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.10.down_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.10.gate_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.10.up_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.11.down_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.11.gate_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.11.up_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.12.down_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.12.gate_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.12.up_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.13.down_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.13.gate_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.13.up_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.14.down_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.14.gate_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.14.up_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.15.down_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.15.gate_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.15.up_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.16.down_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.16.gate_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.16.up_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.17.down_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.17.gate_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.17.up_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.18.down_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.18.gate_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.18.up_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.19.down_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.19.gate_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.19.up_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.2.down_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.2.gate_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.2.up_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.20.down_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.20.gate_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.20.up_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.21.down_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.21.gate_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.21.up_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.22.down_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.22.gate_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.22.up_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.23.down_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.23.gate_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.23.up_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.24.down_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.24.gate_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.24.up_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.25.down_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.25.gate_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.25.up_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.26.down_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.26.gate_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.26.up_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.27.down_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.27.gate_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.27.up_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.28.down_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.28.gate_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.28.up_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.29.down_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.29.gate_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.29.up_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.3.down_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.3.gate_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.3.up_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.30.down_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.30.gate_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.30.up_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.31.down_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.31.gate_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.31.up_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.32.down_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.32.gate_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.32.up_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.33.down_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.33.gate_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.33.up_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.34.down_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.34.gate_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.34.up_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.35.down_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.35.gate_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.35.up_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.36.down_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.36.gate_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.36.up_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.37.down_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.37.gate_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.37.up_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.38.down_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.38.gate_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.38.up_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.39.down_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.39.gate_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.39.up_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.4.down_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.4.gate_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.4.up_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.40.down_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.40.gate_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.40.up_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.41.down_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.41.gate_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.41.up_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.42.down_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.42.gate_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.42.up_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.43.down_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.43.gate_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.43.up_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.44.down_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.44.gate_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.44.up_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.45.down_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.45.gate_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.45.up_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.46.down_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.46.gate_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.46.up_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.47.down_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.47.gate_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.47.up_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.48.down_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.48.gate_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.48.up_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.49.down_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.60.mlp.experts.49.gate_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.60.mlp.experts.49.up_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.60.mlp.experts.5.down_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.5.gate_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.5.up_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.50.down_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.60.mlp.experts.50.gate_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.60.mlp.experts.50.up_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.60.mlp.experts.51.down_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.60.mlp.experts.51.gate_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.60.mlp.experts.51.up_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.60.mlp.experts.52.down_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.60.mlp.experts.52.gate_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.60.mlp.experts.52.up_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.60.mlp.experts.53.down_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.60.mlp.experts.53.gate_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.60.mlp.experts.53.up_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.60.mlp.experts.54.down_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.60.mlp.experts.54.gate_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.60.mlp.experts.54.up_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.60.mlp.experts.55.down_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.60.mlp.experts.55.gate_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.60.mlp.experts.55.up_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.60.mlp.experts.56.down_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.60.mlp.experts.56.gate_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.60.mlp.experts.56.up_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.60.mlp.experts.57.down_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.60.mlp.experts.57.gate_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.60.mlp.experts.57.up_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.60.mlp.experts.58.down_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.60.mlp.experts.58.gate_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.60.mlp.experts.58.up_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.60.mlp.experts.59.down_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.60.mlp.experts.59.gate_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.60.mlp.experts.59.up_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.60.mlp.experts.6.down_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.6.gate_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.6.up_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.60.down_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.60.mlp.experts.60.gate_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.60.mlp.experts.60.up_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.60.mlp.experts.61.down_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.60.mlp.experts.61.gate_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.60.mlp.experts.61.up_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.60.mlp.experts.62.down_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.60.mlp.experts.62.gate_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.60.mlp.experts.62.up_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.60.mlp.experts.63.down_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.60.mlp.experts.63.gate_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.60.mlp.experts.63.up_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.60.mlp.experts.64.down_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.60.mlp.experts.64.gate_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.60.mlp.experts.64.up_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.60.mlp.experts.65.down_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.60.mlp.experts.65.gate_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.60.mlp.experts.65.up_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.60.mlp.experts.66.down_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.60.mlp.experts.66.gate_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.60.mlp.experts.66.up_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.60.mlp.experts.67.down_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.60.mlp.experts.67.gate_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.60.mlp.experts.67.up_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.60.mlp.experts.68.down_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.60.mlp.experts.68.gate_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.60.mlp.experts.68.up_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.60.mlp.experts.69.down_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.60.mlp.experts.69.gate_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.60.mlp.experts.69.up_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.60.mlp.experts.7.down_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.7.gate_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.7.up_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.70.down_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.60.mlp.experts.70.gate_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.60.mlp.experts.70.up_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.60.mlp.experts.71.down_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.60.mlp.experts.71.gate_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.60.mlp.experts.71.up_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.60.mlp.experts.72.down_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.60.mlp.experts.72.gate_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.60.mlp.experts.72.up_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.60.mlp.experts.73.down_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.60.mlp.experts.73.gate_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.60.mlp.experts.73.up_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.60.mlp.experts.74.down_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.60.mlp.experts.74.gate_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.60.mlp.experts.74.up_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.60.mlp.experts.75.down_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.60.mlp.experts.75.gate_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.60.mlp.experts.75.up_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.60.mlp.experts.76.down_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.60.mlp.experts.76.gate_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.60.mlp.experts.76.up_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.60.mlp.experts.77.down_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.60.mlp.experts.77.gate_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.60.mlp.experts.77.up_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.60.mlp.experts.78.down_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.60.mlp.experts.78.gate_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.60.mlp.experts.78.up_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.60.mlp.experts.79.down_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.60.mlp.experts.79.gate_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.60.mlp.experts.79.up_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.60.mlp.experts.8.down_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.8.gate_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.8.up_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.9.down_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.9.gate_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.experts.9.up_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.mlp.gate.e_score_correction_bias": "model-00049-of-00075.safetensors", + "model.layers.60.mlp.gate.weight": "model-00049-of-00075.safetensors", + "model.layers.60.mlp.shared_experts.down_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.60.mlp.shared_experts.gate_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.60.mlp.shared_experts.up_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.60.post_attention_layernorm.weight": "model-00049-of-00075.safetensors", + "model.layers.60.self_attn.k_norm.weight": "model-00048-of-00075.safetensors", + "model.layers.60.self_attn.k_proj.bias": "model-00048-of-00075.safetensors", + "model.layers.60.self_attn.k_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.self_attn.o_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.self_attn.q_norm.weight": "model-00048-of-00075.safetensors", + "model.layers.60.self_attn.q_proj.bias": "model-00048-of-00075.safetensors", + "model.layers.60.self_attn.q_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.60.self_attn.v_proj.bias": "model-00048-of-00075.safetensors", + "model.layers.60.self_attn.v_proj.weight": "model-00048-of-00075.safetensors", + "model.layers.61.input_layernorm.weight": "model-00050-of-00075.safetensors", + "model.layers.61.mlp.experts.0.down_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.0.gate_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.0.up_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.1.down_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.1.gate_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.1.up_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.10.down_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.10.gate_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.10.up_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.11.down_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.11.gate_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.11.up_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.12.down_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.12.gate_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.12.up_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.13.down_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.13.gate_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.13.up_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.14.down_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.14.gate_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.14.up_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.15.down_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.15.gate_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.15.up_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.16.down_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.16.gate_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.16.up_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.17.down_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.17.gate_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.17.up_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.18.down_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.18.gate_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.18.up_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.19.down_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.19.gate_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.19.up_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.2.down_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.2.gate_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.2.up_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.20.down_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.20.gate_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.20.up_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.21.down_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.21.gate_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.21.up_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.22.down_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.22.gate_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.22.up_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.23.down_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.23.gate_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.23.up_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.24.down_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.24.gate_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.24.up_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.25.down_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.25.gate_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.25.up_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.26.down_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.26.gate_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.26.up_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.27.down_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.27.gate_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.27.up_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.28.down_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.28.gate_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.28.up_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.29.down_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.29.gate_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.29.up_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.3.down_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.3.gate_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.3.up_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.30.down_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.30.gate_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.30.up_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.31.down_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.31.gate_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.31.up_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.32.down_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.32.gate_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.32.up_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.33.down_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.33.gate_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.33.up_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.34.down_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.34.gate_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.34.up_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.35.down_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.35.gate_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.35.up_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.36.down_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.36.gate_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.36.up_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.37.down_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.37.gate_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.37.up_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.38.down_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.38.gate_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.38.up_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.39.down_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.39.gate_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.39.up_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.4.down_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.4.gate_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.4.up_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.40.down_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.40.gate_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.40.up_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.41.down_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.41.gate_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.41.up_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.42.down_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.42.gate_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.42.up_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.43.down_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.43.gate_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.43.up_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.44.down_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.44.gate_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.44.up_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.45.down_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.45.gate_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.45.up_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.46.down_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.46.gate_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.46.up_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.47.down_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.47.gate_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.47.up_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.48.down_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.48.gate_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.48.up_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.49.down_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.49.gate_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.49.up_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.5.down_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.5.gate_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.5.up_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.50.down_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.50.gate_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.50.up_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.51.down_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.51.gate_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.51.up_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.52.down_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.52.gate_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.52.up_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.53.down_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.53.gate_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.53.up_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.54.down_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.54.gate_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.54.up_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.55.down_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.55.gate_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.55.up_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.56.down_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.56.gate_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.56.up_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.57.down_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.57.gate_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.57.up_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.58.down_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.58.gate_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.58.up_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.59.down_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.59.gate_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.59.up_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.6.down_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.6.gate_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.6.up_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.60.down_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.60.gate_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.60.up_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.61.down_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.61.gate_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.61.up_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.62.down_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.62.gate_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.62.up_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.63.down_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.63.gate_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.63.up_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.64.down_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.64.gate_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.64.up_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.65.down_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.65.gate_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.65.up_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.66.down_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.66.gate_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.66.up_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.67.down_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.67.gate_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.67.up_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.68.down_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.61.mlp.experts.68.gate_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.61.mlp.experts.68.up_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.61.mlp.experts.69.down_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.61.mlp.experts.69.gate_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.61.mlp.experts.69.up_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.61.mlp.experts.7.down_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.7.gate_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.7.up_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.70.down_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.61.mlp.experts.70.gate_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.61.mlp.experts.70.up_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.61.mlp.experts.71.down_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.61.mlp.experts.71.gate_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.61.mlp.experts.71.up_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.61.mlp.experts.72.down_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.61.mlp.experts.72.gate_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.61.mlp.experts.72.up_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.61.mlp.experts.73.down_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.61.mlp.experts.73.gate_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.61.mlp.experts.73.up_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.61.mlp.experts.74.down_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.61.mlp.experts.74.gate_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.61.mlp.experts.74.up_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.61.mlp.experts.75.down_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.61.mlp.experts.75.gate_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.61.mlp.experts.75.up_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.61.mlp.experts.76.down_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.61.mlp.experts.76.gate_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.61.mlp.experts.76.up_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.61.mlp.experts.77.down_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.61.mlp.experts.77.gate_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.61.mlp.experts.77.up_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.61.mlp.experts.78.down_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.61.mlp.experts.78.gate_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.61.mlp.experts.78.up_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.61.mlp.experts.79.down_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.61.mlp.experts.79.gate_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.61.mlp.experts.79.up_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.61.mlp.experts.8.down_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.8.gate_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.8.up_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.9.down_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.9.gate_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.experts.9.up_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.mlp.gate.e_score_correction_bias": "model-00050-of-00075.safetensors", + "model.layers.61.mlp.gate.weight": "model-00050-of-00075.safetensors", + "model.layers.61.mlp.shared_experts.down_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.61.mlp.shared_experts.gate_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.61.mlp.shared_experts.up_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.61.post_attention_layernorm.weight": "model-00050-of-00075.safetensors", + "model.layers.61.self_attn.k_norm.weight": "model-00049-of-00075.safetensors", + "model.layers.61.self_attn.k_proj.bias": "model-00049-of-00075.safetensors", + "model.layers.61.self_attn.k_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.self_attn.o_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.self_attn.q_norm.weight": "model-00049-of-00075.safetensors", + "model.layers.61.self_attn.q_proj.bias": "model-00049-of-00075.safetensors", + "model.layers.61.self_attn.q_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.61.self_attn.v_proj.bias": "model-00049-of-00075.safetensors", + "model.layers.61.self_attn.v_proj.weight": "model-00049-of-00075.safetensors", + "model.layers.62.input_layernorm.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.0.down_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.0.gate_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.0.up_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.1.down_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.1.gate_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.1.up_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.10.down_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.10.gate_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.10.up_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.11.down_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.11.gate_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.11.up_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.12.down_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.12.gate_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.12.up_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.13.down_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.13.gate_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.13.up_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.14.down_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.14.gate_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.14.up_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.15.down_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.15.gate_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.15.up_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.16.down_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.16.gate_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.16.up_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.17.down_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.17.gate_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.17.up_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.18.down_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.18.gate_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.18.up_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.19.down_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.19.gate_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.19.up_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.2.down_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.2.gate_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.2.up_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.20.down_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.20.gate_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.20.up_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.21.down_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.21.gate_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.21.up_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.22.down_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.22.gate_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.22.up_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.23.down_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.23.gate_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.23.up_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.24.down_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.24.gate_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.24.up_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.25.down_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.25.gate_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.25.up_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.26.down_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.26.gate_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.26.up_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.27.down_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.27.gate_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.27.up_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.28.down_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.28.gate_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.28.up_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.29.down_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.29.gate_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.29.up_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.3.down_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.3.gate_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.3.up_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.30.down_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.30.gate_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.30.up_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.31.down_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.31.gate_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.31.up_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.32.down_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.32.gate_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.32.up_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.33.down_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.33.gate_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.33.up_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.34.down_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.34.gate_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.34.up_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.35.down_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.35.gate_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.35.up_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.36.down_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.36.gate_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.36.up_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.37.down_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.37.gate_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.37.up_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.38.down_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.38.gate_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.38.up_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.39.down_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.39.gate_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.39.up_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.4.down_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.4.gate_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.4.up_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.40.down_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.40.gate_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.40.up_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.41.down_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.41.gate_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.41.up_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.42.down_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.42.gate_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.42.up_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.43.down_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.43.gate_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.43.up_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.44.down_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.44.gate_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.44.up_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.45.down_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.45.gate_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.45.up_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.46.down_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.46.gate_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.46.up_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.47.down_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.47.gate_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.47.up_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.48.down_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.48.gate_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.48.up_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.49.down_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.49.gate_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.49.up_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.5.down_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.5.gate_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.5.up_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.50.down_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.50.gate_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.50.up_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.51.down_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.51.gate_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.51.up_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.52.down_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.52.gate_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.52.up_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.53.down_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.53.gate_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.53.up_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.54.down_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.54.gate_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.54.up_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.55.down_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.55.gate_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.55.up_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.56.down_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.56.gate_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.56.up_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.57.down_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.57.gate_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.57.up_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.58.down_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.58.gate_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.58.up_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.59.down_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.59.gate_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.59.up_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.6.down_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.6.gate_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.6.up_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.60.down_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.60.gate_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.60.up_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.61.down_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.61.gate_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.61.up_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.62.down_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.62.gate_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.62.up_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.63.down_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.63.gate_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.63.up_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.64.down_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.64.gate_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.64.up_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.65.down_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.65.gate_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.65.up_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.66.down_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.66.gate_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.66.up_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.67.down_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.67.gate_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.67.up_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.68.down_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.68.gate_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.68.up_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.69.down_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.69.gate_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.69.up_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.7.down_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.7.gate_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.7.up_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.70.down_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.70.gate_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.70.up_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.71.down_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.71.gate_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.71.up_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.72.down_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.72.gate_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.72.up_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.73.down_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.73.gate_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.73.up_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.74.down_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.74.gate_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.74.up_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.75.down_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.75.gate_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.75.up_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.76.down_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.76.gate_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.76.up_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.77.down_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.77.gate_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.77.up_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.78.down_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.78.gate_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.78.up_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.79.down_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.79.gate_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.79.up_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.8.down_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.8.gate_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.8.up_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.9.down_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.9.gate_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.experts.9.up_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.gate.e_score_correction_bias": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.gate.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.shared_experts.down_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.shared_experts.gate_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.mlp.shared_experts.up_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.post_attention_layernorm.weight": "model-00050-of-00075.safetensors", + "model.layers.62.self_attn.k_norm.weight": "model-00050-of-00075.safetensors", + "model.layers.62.self_attn.k_proj.bias": "model-00050-of-00075.safetensors", + "model.layers.62.self_attn.k_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.self_attn.o_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.self_attn.q_norm.weight": "model-00050-of-00075.safetensors", + "model.layers.62.self_attn.q_proj.bias": "model-00050-of-00075.safetensors", + "model.layers.62.self_attn.q_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.62.self_attn.v_proj.bias": "model-00050-of-00075.safetensors", + "model.layers.62.self_attn.v_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.63.input_layernorm.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.0.down_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.0.gate_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.63.mlp.experts.0.up_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.1.down_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.1.gate_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.1.up_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.10.down_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.10.gate_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.10.up_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.11.down_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.11.gate_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.11.up_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.12.down_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.12.gate_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.12.up_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.13.down_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.13.gate_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.13.up_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.14.down_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.14.gate_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.14.up_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.15.down_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.15.gate_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.15.up_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.16.down_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.16.gate_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.16.up_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.17.down_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.17.gate_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.17.up_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.18.down_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.18.gate_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.18.up_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.19.down_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.19.gate_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.19.up_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.2.down_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.2.gate_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.2.up_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.20.down_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.20.gate_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.20.up_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.21.down_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.21.gate_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.21.up_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.22.down_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.22.gate_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.22.up_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.23.down_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.23.gate_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.23.up_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.24.down_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.24.gate_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.24.up_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.25.down_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.25.gate_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.25.up_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.26.down_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.26.gate_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.26.up_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.27.down_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.27.gate_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.27.up_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.28.down_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.28.gate_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.28.up_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.29.down_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.29.gate_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.29.up_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.3.down_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.3.gate_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.3.up_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.30.down_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.30.gate_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.30.up_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.31.down_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.31.gate_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.31.up_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.32.down_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.32.gate_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.32.up_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.33.down_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.33.gate_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.33.up_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.34.down_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.34.gate_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.34.up_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.35.down_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.35.gate_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.35.up_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.36.down_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.36.gate_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.36.up_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.37.down_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.37.gate_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.37.up_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.38.down_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.38.gate_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.38.up_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.39.down_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.39.gate_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.39.up_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.4.down_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.4.gate_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.4.up_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.40.down_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.40.gate_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.40.up_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.41.down_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.41.gate_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.41.up_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.42.down_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.42.gate_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.42.up_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.43.down_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.43.gate_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.43.up_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.44.down_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.44.gate_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.44.up_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.45.down_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.45.gate_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.45.up_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.46.down_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.46.gate_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.46.up_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.47.down_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.47.gate_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.47.up_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.48.down_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.48.gate_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.48.up_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.49.down_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.49.gate_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.49.up_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.5.down_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.5.gate_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.5.up_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.50.down_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.50.gate_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.50.up_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.51.down_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.51.gate_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.51.up_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.52.down_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.52.gate_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.52.up_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.53.down_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.53.gate_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.53.up_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.54.down_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.54.gate_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.54.up_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.55.down_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.55.gate_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.55.up_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.56.down_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.56.gate_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.56.up_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.57.down_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.57.gate_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.57.up_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.58.down_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.58.gate_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.58.up_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.59.down_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.59.gate_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.59.up_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.6.down_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.6.gate_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.6.up_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.60.down_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.60.gate_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.60.up_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.61.down_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.61.gate_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.61.up_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.62.down_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.62.gate_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.62.up_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.63.down_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.63.gate_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.63.up_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.64.down_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.64.gate_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.64.up_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.65.down_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.65.gate_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.65.up_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.66.down_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.66.gate_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.66.up_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.67.down_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.67.gate_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.67.up_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.68.down_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.68.gate_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.68.up_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.69.down_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.69.gate_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.69.up_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.7.down_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.7.gate_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.7.up_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.70.down_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.70.gate_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.70.up_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.71.down_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.71.gate_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.71.up_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.72.down_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.72.gate_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.72.up_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.73.down_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.73.gate_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.73.up_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.74.down_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.74.gate_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.74.up_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.75.down_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.75.gate_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.75.up_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.76.down_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.76.gate_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.76.up_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.77.down_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.77.gate_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.77.up_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.78.down_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.78.gate_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.78.up_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.79.down_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.79.gate_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.79.up_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.8.down_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.8.gate_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.8.up_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.9.down_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.9.gate_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.experts.9.up_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.gate.e_score_correction_bias": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.gate.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.shared_experts.down_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.shared_experts.gate_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.mlp.shared_experts.up_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.63.post_attention_layernorm.weight": "model-00051-of-00075.safetensors", + "model.layers.63.self_attn.k_norm.weight": "model-00050-of-00075.safetensors", + "model.layers.63.self_attn.k_proj.bias": "model-00050-of-00075.safetensors", + "model.layers.63.self_attn.k_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.63.self_attn.o_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.63.self_attn.q_norm.weight": "model-00050-of-00075.safetensors", + "model.layers.63.self_attn.q_proj.bias": "model-00050-of-00075.safetensors", + "model.layers.63.self_attn.q_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.63.self_attn.v_proj.bias": "model-00050-of-00075.safetensors", + "model.layers.63.self_attn.v_proj.weight": "model-00050-of-00075.safetensors", + "model.layers.64.input_layernorm.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.0.down_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.64.mlp.experts.0.gate_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.64.mlp.experts.0.up_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.64.mlp.experts.1.down_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.64.mlp.experts.1.gate_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.64.mlp.experts.1.up_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.64.mlp.experts.10.down_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.64.mlp.experts.10.gate_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.64.mlp.experts.10.up_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.64.mlp.experts.11.down_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.64.mlp.experts.11.gate_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.64.mlp.experts.11.up_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.64.mlp.experts.12.down_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.64.mlp.experts.12.gate_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.64.mlp.experts.12.up_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.64.mlp.experts.13.down_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.64.mlp.experts.13.gate_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.64.mlp.experts.13.up_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.64.mlp.experts.14.down_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.64.mlp.experts.14.gate_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.64.mlp.experts.14.up_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.64.mlp.experts.15.down_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.64.mlp.experts.15.gate_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.64.mlp.experts.15.up_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.64.mlp.experts.16.down_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.64.mlp.experts.16.gate_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.64.mlp.experts.16.up_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.64.mlp.experts.17.down_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.64.mlp.experts.17.gate_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.64.mlp.experts.17.up_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.64.mlp.experts.18.down_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.64.mlp.experts.18.gate_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.64.mlp.experts.18.up_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.64.mlp.experts.19.down_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.19.gate_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.64.mlp.experts.19.up_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.2.down_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.64.mlp.experts.2.gate_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.64.mlp.experts.2.up_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.64.mlp.experts.20.down_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.20.gate_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.20.up_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.21.down_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.21.gate_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.21.up_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.22.down_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.22.gate_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.22.up_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.23.down_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.23.gate_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.23.up_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.24.down_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.24.gate_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.24.up_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.25.down_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.25.gate_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.25.up_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.26.down_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.26.gate_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.26.up_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.27.down_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.27.gate_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.27.up_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.28.down_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.28.gate_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.28.up_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.29.down_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.29.gate_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.29.up_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.3.down_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.64.mlp.experts.3.gate_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.64.mlp.experts.3.up_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.64.mlp.experts.30.down_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.30.gate_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.30.up_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.31.down_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.31.gate_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.31.up_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.32.down_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.32.gate_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.32.up_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.33.down_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.33.gate_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.33.up_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.34.down_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.34.gate_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.34.up_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.35.down_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.35.gate_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.35.up_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.36.down_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.36.gate_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.36.up_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.37.down_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.37.gate_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.37.up_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.38.down_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.38.gate_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.38.up_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.39.down_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.39.gate_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.39.up_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.4.down_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.64.mlp.experts.4.gate_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.64.mlp.experts.4.up_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.64.mlp.experts.40.down_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.40.gate_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.40.up_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.41.down_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.41.gate_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.41.up_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.42.down_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.42.gate_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.42.up_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.43.down_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.43.gate_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.43.up_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.44.down_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.44.gate_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.44.up_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.45.down_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.45.gate_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.45.up_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.46.down_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.46.gate_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.46.up_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.47.down_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.47.gate_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.47.up_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.48.down_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.48.gate_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.48.up_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.49.down_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.49.gate_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.49.up_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.5.down_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.64.mlp.experts.5.gate_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.64.mlp.experts.5.up_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.64.mlp.experts.50.down_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.50.gate_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.50.up_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.51.down_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.51.gate_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.51.up_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.52.down_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.52.gate_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.52.up_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.53.down_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.53.gate_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.53.up_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.54.down_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.54.gate_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.54.up_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.55.down_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.55.gate_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.55.up_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.56.down_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.56.gate_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.56.up_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.57.down_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.57.gate_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.57.up_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.58.down_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.58.gate_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.58.up_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.59.down_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.59.gate_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.59.up_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.6.down_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.64.mlp.experts.6.gate_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.64.mlp.experts.6.up_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.64.mlp.experts.60.down_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.60.gate_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.60.up_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.61.down_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.61.gate_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.61.up_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.62.down_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.62.gate_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.62.up_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.63.down_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.63.gate_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.63.up_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.64.down_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.64.gate_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.64.up_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.65.down_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.65.gate_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.65.up_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.66.down_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.66.gate_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.66.up_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.67.down_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.67.gate_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.67.up_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.68.down_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.68.gate_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.68.up_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.69.down_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.69.gate_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.69.up_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.7.down_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.64.mlp.experts.7.gate_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.64.mlp.experts.7.up_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.64.mlp.experts.70.down_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.70.gate_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.70.up_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.71.down_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.71.gate_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.71.up_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.72.down_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.72.gate_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.72.up_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.73.down_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.73.gate_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.73.up_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.74.down_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.74.gate_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.74.up_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.75.down_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.75.gate_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.75.up_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.76.down_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.76.gate_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.76.up_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.77.down_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.77.gate_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.77.up_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.78.down_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.78.gate_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.78.up_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.79.down_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.79.gate_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.79.up_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.experts.8.down_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.64.mlp.experts.8.gate_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.64.mlp.experts.8.up_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.64.mlp.experts.9.down_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.64.mlp.experts.9.gate_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.64.mlp.experts.9.up_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.64.mlp.gate.e_score_correction_bias": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.gate.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.shared_experts.down_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.shared_experts.gate_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.mlp.shared_experts.up_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.64.post_attention_layernorm.weight": "model-00052-of-00075.safetensors", + "model.layers.64.self_attn.k_norm.weight": "model-00051-of-00075.safetensors", + "model.layers.64.self_attn.k_proj.bias": "model-00051-of-00075.safetensors", + "model.layers.64.self_attn.k_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.64.self_attn.o_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.64.self_attn.q_norm.weight": "model-00051-of-00075.safetensors", + "model.layers.64.self_attn.q_proj.bias": "model-00051-of-00075.safetensors", + "model.layers.64.self_attn.q_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.64.self_attn.v_proj.bias": "model-00051-of-00075.safetensors", + "model.layers.64.self_attn.v_proj.weight": "model-00051-of-00075.safetensors", + "model.layers.65.input_layernorm.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.0.down_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.mlp.experts.0.gate_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.mlp.experts.0.up_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.mlp.experts.1.down_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.mlp.experts.1.gate_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.mlp.experts.1.up_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.mlp.experts.10.down_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.mlp.experts.10.gate_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.mlp.experts.10.up_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.mlp.experts.11.down_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.mlp.experts.11.gate_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.mlp.experts.11.up_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.mlp.experts.12.down_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.mlp.experts.12.gate_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.mlp.experts.12.up_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.mlp.experts.13.down_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.mlp.experts.13.gate_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.mlp.experts.13.up_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.mlp.experts.14.down_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.mlp.experts.14.gate_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.mlp.experts.14.up_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.mlp.experts.15.down_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.mlp.experts.15.gate_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.mlp.experts.15.up_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.mlp.experts.16.down_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.mlp.experts.16.gate_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.mlp.experts.16.up_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.mlp.experts.17.down_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.mlp.experts.17.gate_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.mlp.experts.17.up_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.mlp.experts.18.down_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.mlp.experts.18.gate_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.mlp.experts.18.up_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.mlp.experts.19.down_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.mlp.experts.19.gate_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.mlp.experts.19.up_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.mlp.experts.2.down_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.mlp.experts.2.gate_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.mlp.experts.2.up_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.mlp.experts.20.down_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.mlp.experts.20.gate_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.mlp.experts.20.up_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.mlp.experts.21.down_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.mlp.experts.21.gate_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.mlp.experts.21.up_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.mlp.experts.22.down_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.mlp.experts.22.gate_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.mlp.experts.22.up_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.mlp.experts.23.down_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.mlp.experts.23.gate_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.mlp.experts.23.up_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.mlp.experts.24.down_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.mlp.experts.24.gate_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.mlp.experts.24.up_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.mlp.experts.25.down_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.mlp.experts.25.gate_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.mlp.experts.25.up_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.mlp.experts.26.down_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.mlp.experts.26.gate_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.mlp.experts.26.up_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.mlp.experts.27.down_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.mlp.experts.27.gate_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.mlp.experts.27.up_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.mlp.experts.28.down_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.mlp.experts.28.gate_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.mlp.experts.28.up_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.mlp.experts.29.down_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.mlp.experts.29.gate_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.mlp.experts.29.up_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.mlp.experts.3.down_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.mlp.experts.3.gate_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.mlp.experts.3.up_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.mlp.experts.30.down_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.mlp.experts.30.gate_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.mlp.experts.30.up_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.mlp.experts.31.down_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.mlp.experts.31.gate_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.mlp.experts.31.up_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.mlp.experts.32.down_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.mlp.experts.32.gate_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.mlp.experts.32.up_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.mlp.experts.33.down_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.mlp.experts.33.gate_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.mlp.experts.33.up_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.mlp.experts.34.down_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.mlp.experts.34.gate_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.mlp.experts.34.up_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.mlp.experts.35.down_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.mlp.experts.35.gate_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.mlp.experts.35.up_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.mlp.experts.36.down_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.mlp.experts.36.gate_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.mlp.experts.36.up_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.mlp.experts.37.down_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.mlp.experts.37.gate_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.mlp.experts.37.up_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.mlp.experts.38.down_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.38.gate_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.mlp.experts.38.up_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.39.down_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.39.gate_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.39.up_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.4.down_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.mlp.experts.4.gate_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.mlp.experts.4.up_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.mlp.experts.40.down_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.40.gate_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.40.up_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.41.down_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.41.gate_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.41.up_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.42.down_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.42.gate_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.42.up_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.43.down_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.43.gate_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.43.up_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.44.down_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.44.gate_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.44.up_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.45.down_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.45.gate_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.45.up_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.46.down_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.46.gate_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.46.up_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.47.down_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.47.gate_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.47.up_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.48.down_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.48.gate_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.48.up_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.49.down_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.49.gate_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.49.up_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.5.down_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.mlp.experts.5.gate_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.mlp.experts.5.up_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.mlp.experts.50.down_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.50.gate_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.50.up_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.51.down_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.51.gate_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.51.up_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.52.down_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.52.gate_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.52.up_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.53.down_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.53.gate_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.53.up_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.54.down_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.54.gate_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.54.up_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.55.down_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.55.gate_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.55.up_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.56.down_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.56.gate_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.56.up_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.57.down_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.57.gate_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.57.up_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.58.down_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.58.gate_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.58.up_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.59.down_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.59.gate_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.59.up_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.6.down_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.mlp.experts.6.gate_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.mlp.experts.6.up_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.mlp.experts.60.down_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.60.gate_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.60.up_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.61.down_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.61.gate_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.61.up_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.62.down_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.62.gate_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.62.up_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.63.down_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.63.gate_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.63.up_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.64.down_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.64.gate_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.64.up_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.65.down_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.65.gate_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.65.up_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.66.down_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.66.gate_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.66.up_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.67.down_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.67.gate_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.67.up_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.68.down_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.68.gate_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.68.up_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.69.down_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.69.gate_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.69.up_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.7.down_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.mlp.experts.7.gate_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.mlp.experts.7.up_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.mlp.experts.70.down_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.70.gate_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.70.up_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.71.down_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.71.gate_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.71.up_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.72.down_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.72.gate_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.72.up_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.73.down_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.73.gate_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.73.up_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.74.down_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.74.gate_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.74.up_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.75.down_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.75.gate_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.75.up_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.76.down_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.76.gate_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.76.up_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.77.down_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.77.gate_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.77.up_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.78.down_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.78.gate_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.78.up_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.79.down_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.79.gate_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.79.up_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.experts.8.down_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.mlp.experts.8.gate_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.mlp.experts.8.up_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.mlp.experts.9.down_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.mlp.experts.9.gate_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.mlp.experts.9.up_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.mlp.gate.e_score_correction_bias": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.gate.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.shared_experts.down_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.shared_experts.gate_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.mlp.shared_experts.up_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.65.post_attention_layernorm.weight": "model-00053-of-00075.safetensors", + "model.layers.65.self_attn.k_norm.weight": "model-00052-of-00075.safetensors", + "model.layers.65.self_attn.k_proj.bias": "model-00052-of-00075.safetensors", + "model.layers.65.self_attn.k_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.self_attn.o_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.self_attn.q_norm.weight": "model-00052-of-00075.safetensors", + "model.layers.65.self_attn.q_proj.bias": "model-00052-of-00075.safetensors", + "model.layers.65.self_attn.q_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.65.self_attn.v_proj.bias": "model-00052-of-00075.safetensors", + "model.layers.65.self_attn.v_proj.weight": "model-00052-of-00075.safetensors", + "model.layers.66.input_layernorm.weight": "model-00054-of-00075.safetensors", + "model.layers.66.mlp.experts.0.down_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.0.gate_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.0.up_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.1.down_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.1.gate_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.1.up_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.10.down_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.10.gate_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.10.up_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.11.down_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.11.gate_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.11.up_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.12.down_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.12.gate_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.12.up_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.13.down_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.13.gate_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.13.up_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.14.down_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.14.gate_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.14.up_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.15.down_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.15.gate_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.15.up_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.16.down_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.16.gate_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.16.up_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.17.down_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.17.gate_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.17.up_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.18.down_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.18.gate_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.18.up_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.19.down_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.19.gate_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.19.up_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.2.down_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.2.gate_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.2.up_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.20.down_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.20.gate_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.20.up_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.21.down_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.21.gate_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.21.up_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.22.down_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.22.gate_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.22.up_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.23.down_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.23.gate_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.23.up_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.24.down_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.24.gate_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.24.up_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.25.down_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.25.gate_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.25.up_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.26.down_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.26.gate_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.26.up_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.27.down_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.27.gate_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.27.up_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.28.down_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.28.gate_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.28.up_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.29.down_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.29.gate_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.29.up_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.3.down_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.3.gate_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.3.up_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.30.down_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.30.gate_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.30.up_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.31.down_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.31.gate_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.31.up_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.32.down_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.32.gate_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.32.up_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.33.down_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.33.gate_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.33.up_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.34.down_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.34.gate_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.34.up_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.35.down_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.35.gate_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.35.up_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.36.down_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.36.gate_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.36.up_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.37.down_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.37.gate_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.37.up_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.38.down_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.38.gate_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.38.up_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.39.down_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.39.gate_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.39.up_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.4.down_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.4.gate_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.4.up_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.40.down_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.40.gate_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.40.up_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.41.down_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.41.gate_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.41.up_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.42.down_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.42.gate_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.42.up_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.43.down_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.43.gate_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.43.up_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.44.down_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.44.gate_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.44.up_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.45.down_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.45.gate_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.45.up_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.46.down_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.46.gate_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.46.up_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.47.down_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.47.gate_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.47.up_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.48.down_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.48.gate_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.48.up_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.49.down_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.49.gate_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.49.up_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.5.down_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.5.gate_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.5.up_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.50.down_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.50.gate_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.50.up_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.51.down_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.51.gate_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.51.up_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.52.down_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.52.gate_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.52.up_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.53.down_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.53.gate_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.53.up_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.54.down_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.54.gate_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.54.up_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.55.down_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.55.gate_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.55.up_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.56.down_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.56.gate_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.56.up_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.57.down_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.66.mlp.experts.57.gate_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.57.up_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.66.mlp.experts.58.down_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.66.mlp.experts.58.gate_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.66.mlp.experts.58.up_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.66.mlp.experts.59.down_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.66.mlp.experts.59.gate_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.66.mlp.experts.59.up_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.66.mlp.experts.6.down_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.6.gate_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.6.up_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.60.down_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.66.mlp.experts.60.gate_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.66.mlp.experts.60.up_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.66.mlp.experts.61.down_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.66.mlp.experts.61.gate_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.66.mlp.experts.61.up_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.66.mlp.experts.62.down_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.66.mlp.experts.62.gate_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.66.mlp.experts.62.up_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.66.mlp.experts.63.down_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.66.mlp.experts.63.gate_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.66.mlp.experts.63.up_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.66.mlp.experts.64.down_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.66.mlp.experts.64.gate_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.66.mlp.experts.64.up_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.66.mlp.experts.65.down_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.66.mlp.experts.65.gate_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.66.mlp.experts.65.up_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.66.mlp.experts.66.down_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.66.mlp.experts.66.gate_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.66.mlp.experts.66.up_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.66.mlp.experts.67.down_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.66.mlp.experts.67.gate_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.66.mlp.experts.67.up_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.66.mlp.experts.68.down_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.66.mlp.experts.68.gate_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.66.mlp.experts.68.up_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.66.mlp.experts.69.down_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.66.mlp.experts.69.gate_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.66.mlp.experts.69.up_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.66.mlp.experts.7.down_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.7.gate_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.7.up_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.70.down_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.66.mlp.experts.70.gate_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.66.mlp.experts.70.up_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.66.mlp.experts.71.down_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.66.mlp.experts.71.gate_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.66.mlp.experts.71.up_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.66.mlp.experts.72.down_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.66.mlp.experts.72.gate_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.66.mlp.experts.72.up_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.66.mlp.experts.73.down_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.66.mlp.experts.73.gate_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.66.mlp.experts.73.up_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.66.mlp.experts.74.down_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.66.mlp.experts.74.gate_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.66.mlp.experts.74.up_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.66.mlp.experts.75.down_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.66.mlp.experts.75.gate_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.66.mlp.experts.75.up_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.66.mlp.experts.76.down_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.66.mlp.experts.76.gate_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.66.mlp.experts.76.up_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.66.mlp.experts.77.down_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.66.mlp.experts.77.gate_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.66.mlp.experts.77.up_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.66.mlp.experts.78.down_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.66.mlp.experts.78.gate_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.66.mlp.experts.78.up_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.66.mlp.experts.79.down_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.66.mlp.experts.79.gate_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.66.mlp.experts.79.up_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.66.mlp.experts.8.down_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.8.gate_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.8.up_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.9.down_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.9.gate_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.experts.9.up_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.mlp.gate.e_score_correction_bias": "model-00054-of-00075.safetensors", + "model.layers.66.mlp.gate.weight": "model-00054-of-00075.safetensors", + "model.layers.66.mlp.shared_experts.down_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.66.mlp.shared_experts.gate_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.66.mlp.shared_experts.up_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.66.post_attention_layernorm.weight": "model-00054-of-00075.safetensors", + "model.layers.66.self_attn.k_norm.weight": "model-00053-of-00075.safetensors", + "model.layers.66.self_attn.k_proj.bias": "model-00053-of-00075.safetensors", + "model.layers.66.self_attn.k_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.self_attn.o_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.self_attn.q_norm.weight": "model-00053-of-00075.safetensors", + "model.layers.66.self_attn.q_proj.bias": "model-00053-of-00075.safetensors", + "model.layers.66.self_attn.q_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.66.self_attn.v_proj.bias": "model-00053-of-00075.safetensors", + "model.layers.66.self_attn.v_proj.weight": "model-00053-of-00075.safetensors", + "model.layers.67.input_layernorm.weight": "model-00055-of-00075.safetensors", + "model.layers.67.mlp.experts.0.down_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.0.gate_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.0.up_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.1.down_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.1.gate_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.1.up_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.10.down_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.10.gate_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.10.up_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.11.down_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.11.gate_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.11.up_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.12.down_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.12.gate_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.12.up_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.13.down_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.13.gate_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.13.up_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.14.down_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.14.gate_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.14.up_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.15.down_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.15.gate_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.15.up_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.16.down_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.16.gate_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.16.up_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.17.down_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.17.gate_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.17.up_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.18.down_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.18.gate_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.18.up_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.19.down_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.19.gate_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.19.up_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.2.down_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.2.gate_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.2.up_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.20.down_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.20.gate_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.20.up_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.21.down_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.21.gate_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.21.up_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.22.down_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.22.gate_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.22.up_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.23.down_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.23.gate_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.23.up_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.24.down_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.24.gate_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.24.up_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.25.down_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.25.gate_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.25.up_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.26.down_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.26.gate_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.26.up_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.27.down_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.27.gate_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.27.up_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.28.down_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.28.gate_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.28.up_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.29.down_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.29.gate_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.29.up_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.3.down_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.3.gate_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.3.up_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.30.down_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.30.gate_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.30.up_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.31.down_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.31.gate_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.31.up_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.32.down_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.32.gate_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.32.up_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.33.down_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.33.gate_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.33.up_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.34.down_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.34.gate_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.34.up_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.35.down_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.35.gate_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.35.up_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.36.down_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.36.gate_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.36.up_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.37.down_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.37.gate_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.37.up_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.38.down_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.38.gate_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.38.up_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.39.down_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.39.gate_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.39.up_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.4.down_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.4.gate_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.4.up_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.40.down_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.40.gate_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.40.up_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.41.down_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.41.gate_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.41.up_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.42.down_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.42.gate_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.42.up_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.43.down_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.43.gate_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.43.up_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.44.down_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.44.gate_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.44.up_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.45.down_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.45.gate_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.45.up_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.46.down_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.46.gate_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.46.up_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.47.down_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.47.gate_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.47.up_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.48.down_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.48.gate_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.48.up_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.49.down_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.49.gate_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.49.up_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.5.down_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.5.gate_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.5.up_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.50.down_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.50.gate_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.50.up_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.51.down_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.51.gate_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.51.up_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.52.down_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.52.gate_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.52.up_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.53.down_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.53.gate_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.53.up_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.54.down_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.54.gate_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.54.up_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.55.down_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.55.gate_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.55.up_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.56.down_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.56.gate_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.56.up_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.57.down_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.57.gate_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.57.up_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.58.down_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.58.gate_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.58.up_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.59.down_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.59.gate_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.59.up_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.6.down_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.6.gate_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.6.up_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.60.down_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.60.gate_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.60.up_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.61.down_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.61.gate_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.61.up_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.62.down_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.62.gate_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.62.up_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.63.down_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.63.gate_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.63.up_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.64.down_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.64.gate_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.64.up_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.65.down_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.65.gate_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.65.up_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.66.down_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.66.gate_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.66.up_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.67.down_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.67.gate_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.67.up_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.68.down_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.68.gate_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.68.up_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.69.down_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.69.gate_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.69.up_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.7.down_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.7.gate_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.7.up_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.70.down_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.70.gate_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.70.up_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.71.down_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.71.gate_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.71.up_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.72.down_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.72.gate_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.72.up_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.73.down_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.73.gate_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.73.up_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.74.down_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.74.gate_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.74.up_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.75.down_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.75.gate_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.75.up_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.76.down_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.67.mlp.experts.76.gate_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.76.up_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.67.mlp.experts.77.down_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.67.mlp.experts.77.gate_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.67.mlp.experts.77.up_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.67.mlp.experts.78.down_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.67.mlp.experts.78.gate_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.67.mlp.experts.78.up_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.67.mlp.experts.79.down_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.67.mlp.experts.79.gate_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.67.mlp.experts.79.up_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.67.mlp.experts.8.down_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.8.gate_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.8.up_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.9.down_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.9.gate_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.experts.9.up_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.mlp.gate.e_score_correction_bias": "model-00055-of-00075.safetensors", + "model.layers.67.mlp.gate.weight": "model-00055-of-00075.safetensors", + "model.layers.67.mlp.shared_experts.down_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.67.mlp.shared_experts.gate_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.67.mlp.shared_experts.up_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.67.post_attention_layernorm.weight": "model-00055-of-00075.safetensors", + "model.layers.67.self_attn.k_norm.weight": "model-00054-of-00075.safetensors", + "model.layers.67.self_attn.k_proj.bias": "model-00054-of-00075.safetensors", + "model.layers.67.self_attn.k_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.self_attn.o_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.self_attn.q_norm.weight": "model-00054-of-00075.safetensors", + "model.layers.67.self_attn.q_proj.bias": "model-00054-of-00075.safetensors", + "model.layers.67.self_attn.q_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.67.self_attn.v_proj.bias": "model-00054-of-00075.safetensors", + "model.layers.67.self_attn.v_proj.weight": "model-00054-of-00075.safetensors", + "model.layers.68.input_layernorm.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.0.down_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.0.gate_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.0.up_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.1.down_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.1.gate_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.1.up_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.10.down_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.10.gate_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.10.up_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.11.down_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.11.gate_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.11.up_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.12.down_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.12.gate_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.12.up_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.13.down_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.13.gate_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.13.up_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.14.down_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.14.gate_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.14.up_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.15.down_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.15.gate_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.15.up_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.16.down_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.16.gate_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.16.up_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.17.down_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.17.gate_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.17.up_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.18.down_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.18.gate_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.18.up_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.19.down_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.19.gate_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.19.up_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.2.down_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.2.gate_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.2.up_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.20.down_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.20.gate_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.20.up_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.21.down_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.21.gate_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.21.up_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.22.down_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.22.gate_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.22.up_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.23.down_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.23.gate_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.23.up_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.24.down_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.24.gate_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.24.up_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.25.down_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.25.gate_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.25.up_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.26.down_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.26.gate_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.26.up_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.27.down_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.27.gate_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.27.up_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.28.down_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.28.gate_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.28.up_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.29.down_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.29.gate_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.29.up_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.3.down_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.3.gate_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.3.up_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.30.down_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.30.gate_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.30.up_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.31.down_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.31.gate_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.31.up_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.32.down_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.32.gate_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.32.up_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.33.down_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.33.gate_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.33.up_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.34.down_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.34.gate_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.34.up_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.35.down_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.35.gate_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.35.up_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.36.down_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.36.gate_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.36.up_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.37.down_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.37.gate_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.37.up_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.38.down_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.38.gate_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.38.up_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.39.down_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.39.gate_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.39.up_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.4.down_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.4.gate_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.4.up_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.40.down_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.40.gate_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.40.up_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.41.down_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.41.gate_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.41.up_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.42.down_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.42.gate_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.42.up_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.43.down_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.43.gate_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.43.up_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.44.down_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.44.gate_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.44.up_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.45.down_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.45.gate_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.45.up_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.46.down_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.46.gate_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.46.up_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.47.down_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.47.gate_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.47.up_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.48.down_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.48.gate_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.48.up_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.49.down_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.49.gate_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.49.up_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.5.down_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.5.gate_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.5.up_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.50.down_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.50.gate_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.50.up_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.51.down_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.51.gate_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.51.up_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.52.down_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.52.gate_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.52.up_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.53.down_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.53.gate_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.53.up_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.54.down_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.54.gate_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.54.up_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.55.down_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.55.gate_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.55.up_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.56.down_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.56.gate_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.56.up_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.57.down_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.57.gate_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.57.up_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.58.down_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.58.gate_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.58.up_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.59.down_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.59.gate_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.59.up_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.6.down_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.6.gate_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.6.up_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.60.down_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.60.gate_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.60.up_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.61.down_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.61.gate_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.61.up_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.62.down_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.62.gate_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.62.up_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.63.down_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.63.gate_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.63.up_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.64.down_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.64.gate_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.64.up_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.65.down_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.65.gate_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.65.up_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.66.down_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.66.gate_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.66.up_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.67.down_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.67.gate_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.67.up_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.68.down_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.68.gate_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.68.up_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.69.down_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.69.gate_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.69.up_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.7.down_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.7.gate_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.7.up_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.70.down_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.70.gate_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.70.up_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.71.down_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.71.gate_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.71.up_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.72.down_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.72.gate_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.72.up_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.73.down_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.73.gate_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.73.up_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.74.down_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.74.gate_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.74.up_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.75.down_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.75.gate_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.75.up_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.76.down_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.76.gate_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.76.up_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.77.down_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.77.gate_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.77.up_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.78.down_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.78.gate_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.78.up_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.79.down_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.79.gate_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.79.up_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.8.down_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.8.gate_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.8.up_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.9.down_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.9.gate_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.experts.9.up_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.gate.e_score_correction_bias": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.gate.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.shared_experts.down_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.shared_experts.gate_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.mlp.shared_experts.up_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.post_attention_layernorm.weight": "model-00055-of-00075.safetensors", + "model.layers.68.self_attn.k_norm.weight": "model-00055-of-00075.safetensors", + "model.layers.68.self_attn.k_proj.bias": "model-00055-of-00075.safetensors", + "model.layers.68.self_attn.k_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.self_attn.o_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.self_attn.q_norm.weight": "model-00055-of-00075.safetensors", + "model.layers.68.self_attn.q_proj.bias": "model-00055-of-00075.safetensors", + "model.layers.68.self_attn.q_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.68.self_attn.v_proj.bias": "model-00055-of-00075.safetensors", + "model.layers.68.self_attn.v_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.69.input_layernorm.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.0.down_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.69.mlp.experts.0.gate_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.69.mlp.experts.0.up_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.69.mlp.experts.1.down_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.69.mlp.experts.1.gate_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.69.mlp.experts.1.up_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.69.mlp.experts.10.down_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.10.gate_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.10.up_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.11.down_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.11.gate_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.11.up_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.12.down_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.12.gate_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.12.up_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.13.down_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.13.gate_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.13.up_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.14.down_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.14.gate_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.14.up_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.15.down_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.15.gate_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.15.up_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.16.down_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.16.gate_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.16.up_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.17.down_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.17.gate_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.17.up_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.18.down_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.18.gate_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.18.up_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.19.down_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.19.gate_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.19.up_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.2.down_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.69.mlp.experts.2.gate_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.69.mlp.experts.2.up_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.69.mlp.experts.20.down_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.20.gate_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.20.up_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.21.down_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.21.gate_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.21.up_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.22.down_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.22.gate_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.22.up_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.23.down_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.23.gate_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.23.up_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.24.down_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.24.gate_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.24.up_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.25.down_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.25.gate_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.25.up_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.26.down_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.26.gate_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.26.up_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.27.down_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.27.gate_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.27.up_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.28.down_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.28.gate_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.28.up_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.29.down_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.29.gate_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.29.up_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.3.down_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.69.mlp.experts.3.gate_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.69.mlp.experts.3.up_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.69.mlp.experts.30.down_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.30.gate_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.30.up_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.31.down_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.31.gate_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.31.up_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.32.down_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.32.gate_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.32.up_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.33.down_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.33.gate_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.33.up_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.34.down_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.34.gate_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.34.up_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.35.down_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.35.gate_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.35.up_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.36.down_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.36.gate_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.36.up_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.37.down_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.37.gate_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.37.up_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.38.down_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.38.gate_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.38.up_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.39.down_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.39.gate_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.39.up_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.4.down_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.69.mlp.experts.4.gate_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.69.mlp.experts.4.up_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.69.mlp.experts.40.down_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.40.gate_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.40.up_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.41.down_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.41.gate_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.41.up_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.42.down_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.42.gate_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.42.up_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.43.down_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.43.gate_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.43.up_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.44.down_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.44.gate_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.44.up_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.45.down_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.45.gate_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.45.up_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.46.down_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.46.gate_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.46.up_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.47.down_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.47.gate_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.47.up_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.48.down_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.48.gate_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.48.up_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.49.down_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.49.gate_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.49.up_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.5.down_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.69.mlp.experts.5.gate_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.69.mlp.experts.5.up_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.69.mlp.experts.50.down_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.50.gate_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.50.up_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.51.down_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.51.gate_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.51.up_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.52.down_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.52.gate_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.52.up_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.53.down_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.53.gate_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.53.up_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.54.down_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.54.gate_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.54.up_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.55.down_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.55.gate_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.55.up_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.56.down_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.56.gate_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.56.up_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.57.down_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.57.gate_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.57.up_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.58.down_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.58.gate_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.58.up_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.59.down_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.59.gate_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.59.up_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.6.down_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.69.mlp.experts.6.gate_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.69.mlp.experts.6.up_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.69.mlp.experts.60.down_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.60.gate_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.60.up_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.61.down_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.61.gate_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.61.up_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.62.down_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.62.gate_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.62.up_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.63.down_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.63.gate_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.63.up_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.64.down_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.64.gate_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.64.up_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.65.down_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.65.gate_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.65.up_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.66.down_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.66.gate_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.66.up_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.67.down_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.67.gate_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.67.up_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.68.down_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.68.gate_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.68.up_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.69.down_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.69.gate_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.69.up_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.7.down_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.69.mlp.experts.7.gate_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.69.mlp.experts.7.up_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.69.mlp.experts.70.down_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.70.gate_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.70.up_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.71.down_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.71.gate_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.71.up_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.72.down_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.72.gate_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.72.up_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.73.down_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.73.gate_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.73.up_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.74.down_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.74.gate_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.74.up_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.75.down_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.75.gate_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.75.up_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.76.down_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.76.gate_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.76.up_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.77.down_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.77.gate_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.77.up_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.78.down_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.78.gate_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.78.up_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.79.down_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.79.gate_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.79.up_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.8.down_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.8.gate_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.69.mlp.experts.8.up_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.69.mlp.experts.9.down_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.9.gate_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.experts.9.up_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.gate.e_score_correction_bias": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.gate.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.shared_experts.down_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.shared_experts.gate_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.mlp.shared_experts.up_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.69.post_attention_layernorm.weight": "model-00056-of-00075.safetensors", + "model.layers.69.self_attn.k_norm.weight": "model-00055-of-00075.safetensors", + "model.layers.69.self_attn.k_proj.bias": "model-00055-of-00075.safetensors", + "model.layers.69.self_attn.k_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.69.self_attn.o_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.69.self_attn.q_norm.weight": "model-00055-of-00075.safetensors", + "model.layers.69.self_attn.q_proj.bias": "model-00055-of-00075.safetensors", + "model.layers.69.self_attn.q_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.69.self_attn.v_proj.bias": "model-00055-of-00075.safetensors", + "model.layers.69.self_attn.v_proj.weight": "model-00055-of-00075.safetensors", + "model.layers.7.input_layernorm.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.0.down_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.0.gate_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.0.up_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.1.down_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.1.gate_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.1.up_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.10.down_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.10.gate_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.10.up_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.11.down_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.11.gate_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.11.up_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.12.down_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.12.gate_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.12.up_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.13.down_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.13.gate_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.13.up_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.14.down_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.14.gate_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.14.up_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.15.down_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.15.gate_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.15.up_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.16.down_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.16.gate_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.16.up_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.17.down_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.17.gate_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.17.up_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.18.down_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.18.gate_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.18.up_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.19.down_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.19.gate_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.19.up_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.2.down_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.2.gate_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.2.up_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.20.down_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.20.gate_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.20.up_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.21.down_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.21.gate_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.21.up_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.22.down_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.22.gate_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.22.up_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.23.down_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.23.gate_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.23.up_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.24.down_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.24.gate_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.24.up_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.25.down_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.25.gate_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.25.up_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.26.down_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.26.gate_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.26.up_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.27.down_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.27.gate_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.27.up_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.28.down_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.28.gate_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.28.up_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.29.down_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.29.gate_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.29.up_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.3.down_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.3.gate_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.3.up_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.30.down_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.30.gate_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.30.up_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.31.down_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.31.gate_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.31.up_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.32.down_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.32.gate_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.32.up_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.33.down_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.33.gate_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.33.up_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.34.down_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.34.gate_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.34.up_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.35.down_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.35.gate_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.35.up_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.36.down_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.36.gate_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.36.up_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.37.down_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.37.gate_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.37.up_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.38.down_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.38.gate_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.38.up_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.39.down_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.39.gate_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.39.up_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.4.down_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.4.gate_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.4.up_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.40.down_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.40.gate_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.40.up_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.41.down_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.41.gate_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.41.up_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.42.down_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.42.gate_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.42.up_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.43.down_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.43.gate_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.43.up_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.44.down_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.44.gate_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.44.up_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.45.down_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.45.gate_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.45.up_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.46.down_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.46.gate_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.46.up_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.47.down_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.47.gate_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.47.up_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.48.down_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.48.gate_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.48.up_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.49.down_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.49.gate_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.49.up_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.5.down_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.5.gate_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.5.up_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.50.down_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.50.gate_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.50.up_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.51.down_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.51.gate_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.51.up_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.52.down_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.52.gate_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.52.up_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.53.down_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.53.gate_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.53.up_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.54.down_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.54.gate_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.54.up_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.55.down_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.55.gate_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.55.up_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.56.down_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.56.gate_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.56.up_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.57.down_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.57.gate_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.57.up_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.58.down_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.58.gate_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.58.up_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.59.down_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.59.gate_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.59.up_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.6.down_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.6.gate_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.6.up_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.60.down_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.60.gate_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.60.up_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.61.down_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.61.gate_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.61.up_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.62.down_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.62.gate_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.62.up_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.63.down_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.63.gate_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.63.up_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.64.down_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.64.gate_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.64.up_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.65.down_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.65.gate_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.65.up_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.66.down_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.66.gate_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.66.up_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.67.down_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.67.gate_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.67.up_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.68.down_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.68.gate_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.68.up_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.69.down_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.69.gate_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.69.up_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.7.down_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.7.gate_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.7.up_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.70.down_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.70.gate_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.70.up_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.71.down_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.71.gate_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.71.up_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.72.down_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.72.gate_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.72.up_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.73.down_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.73.gate_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.73.up_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.74.down_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.74.gate_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.74.up_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.75.down_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.75.gate_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.75.up_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.76.down_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.76.gate_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.76.up_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.77.down_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.77.gate_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.77.up_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.78.down_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.78.gate_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.78.up_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.79.down_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.79.gate_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.79.up_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.8.down_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.8.gate_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.8.up_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.9.down_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.9.gate_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.experts.9.up_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.gate.e_score_correction_bias": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.gate.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.shared_experts.down_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.shared_experts.gate_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.mlp.shared_experts.up_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00005-of-00075.safetensors", + "model.layers.7.self_attn.k_norm.weight": "model-00005-of-00075.safetensors", + "model.layers.7.self_attn.k_proj.bias": "model-00005-of-00075.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.self_attn.q_norm.weight": "model-00005-of-00075.safetensors", + "model.layers.7.self_attn.q_proj.bias": "model-00005-of-00075.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.7.self_attn.v_proj.bias": "model-00005-of-00075.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.70.input_layernorm.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.0.down_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.70.mlp.experts.0.gate_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.70.mlp.experts.0.up_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.70.mlp.experts.1.down_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.70.mlp.experts.1.gate_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.70.mlp.experts.1.up_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.70.mlp.experts.10.down_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.70.mlp.experts.10.gate_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.70.mlp.experts.10.up_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.70.mlp.experts.11.down_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.70.mlp.experts.11.gate_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.70.mlp.experts.11.up_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.70.mlp.experts.12.down_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.70.mlp.experts.12.gate_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.70.mlp.experts.12.up_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.70.mlp.experts.13.down_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.70.mlp.experts.13.gate_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.70.mlp.experts.13.up_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.70.mlp.experts.14.down_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.70.mlp.experts.14.gate_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.70.mlp.experts.14.up_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.70.mlp.experts.15.down_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.70.mlp.experts.15.gate_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.70.mlp.experts.15.up_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.70.mlp.experts.16.down_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.70.mlp.experts.16.gate_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.70.mlp.experts.16.up_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.70.mlp.experts.17.down_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.70.mlp.experts.17.gate_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.70.mlp.experts.17.up_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.70.mlp.experts.18.down_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.70.mlp.experts.18.gate_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.70.mlp.experts.18.up_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.70.mlp.experts.19.down_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.70.mlp.experts.19.gate_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.70.mlp.experts.19.up_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.70.mlp.experts.2.down_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.70.mlp.experts.2.gate_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.70.mlp.experts.2.up_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.70.mlp.experts.20.down_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.70.mlp.experts.20.gate_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.70.mlp.experts.20.up_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.70.mlp.experts.21.down_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.70.mlp.experts.21.gate_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.70.mlp.experts.21.up_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.70.mlp.experts.22.down_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.70.mlp.experts.22.gate_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.70.mlp.experts.22.up_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.70.mlp.experts.23.down_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.70.mlp.experts.23.gate_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.70.mlp.experts.23.up_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.70.mlp.experts.24.down_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.70.mlp.experts.24.gate_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.70.mlp.experts.24.up_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.70.mlp.experts.25.down_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.70.mlp.experts.25.gate_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.70.mlp.experts.25.up_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.70.mlp.experts.26.down_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.70.mlp.experts.26.gate_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.70.mlp.experts.26.up_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.70.mlp.experts.27.down_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.27.gate_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.70.mlp.experts.27.up_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.70.mlp.experts.28.down_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.28.gate_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.28.up_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.29.down_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.29.gate_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.29.up_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.3.down_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.70.mlp.experts.3.gate_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.70.mlp.experts.3.up_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.70.mlp.experts.30.down_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.30.gate_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.30.up_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.31.down_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.31.gate_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.31.up_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.32.down_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.32.gate_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.32.up_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.33.down_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.33.gate_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.33.up_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.34.down_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.34.gate_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.34.up_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.35.down_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.35.gate_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.35.up_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.36.down_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.36.gate_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.36.up_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.37.down_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.37.gate_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.37.up_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.38.down_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.38.gate_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.38.up_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.39.down_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.39.gate_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.39.up_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.4.down_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.70.mlp.experts.4.gate_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.70.mlp.experts.4.up_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.70.mlp.experts.40.down_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.40.gate_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.40.up_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.41.down_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.41.gate_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.41.up_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.42.down_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.42.gate_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.42.up_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.43.down_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.43.gate_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.43.up_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.44.down_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.44.gate_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.44.up_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.45.down_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.45.gate_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.45.up_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.46.down_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.46.gate_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.46.up_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.47.down_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.47.gate_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.47.up_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.48.down_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.48.gate_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.48.up_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.49.down_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.49.gate_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.49.up_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.5.down_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.70.mlp.experts.5.gate_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.70.mlp.experts.5.up_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.70.mlp.experts.50.down_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.50.gate_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.50.up_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.51.down_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.51.gate_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.51.up_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.52.down_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.52.gate_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.52.up_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.53.down_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.53.gate_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.53.up_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.54.down_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.54.gate_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.54.up_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.55.down_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.55.gate_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.55.up_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.56.down_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.56.gate_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.56.up_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.57.down_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.57.gate_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.57.up_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.58.down_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.58.gate_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.58.up_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.59.down_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.59.gate_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.59.up_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.6.down_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.70.mlp.experts.6.gate_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.70.mlp.experts.6.up_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.70.mlp.experts.60.down_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.60.gate_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.60.up_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.61.down_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.61.gate_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.61.up_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.62.down_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.62.gate_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.62.up_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.63.down_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.63.gate_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.63.up_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.64.down_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.64.gate_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.64.up_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.65.down_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.65.gate_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.65.up_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.66.down_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.66.gate_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.66.up_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.67.down_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.67.gate_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.67.up_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.68.down_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.68.gate_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.68.up_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.69.down_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.69.gate_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.69.up_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.7.down_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.70.mlp.experts.7.gate_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.70.mlp.experts.7.up_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.70.mlp.experts.70.down_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.70.gate_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.70.up_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.71.down_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.71.gate_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.71.up_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.72.down_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.72.gate_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.72.up_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.73.down_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.73.gate_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.73.up_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.74.down_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.74.gate_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.74.up_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.75.down_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.75.gate_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.75.up_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.76.down_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.76.gate_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.76.up_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.77.down_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.77.gate_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.77.up_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.78.down_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.78.gate_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.78.up_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.79.down_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.79.gate_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.79.up_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.experts.8.down_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.70.mlp.experts.8.gate_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.70.mlp.experts.8.up_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.70.mlp.experts.9.down_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.70.mlp.experts.9.gate_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.70.mlp.experts.9.up_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.70.mlp.gate.e_score_correction_bias": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.gate.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.shared_experts.down_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.shared_experts.gate_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.mlp.shared_experts.up_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.70.post_attention_layernorm.weight": "model-00057-of-00075.safetensors", + "model.layers.70.self_attn.k_norm.weight": "model-00056-of-00075.safetensors", + "model.layers.70.self_attn.k_proj.bias": "model-00056-of-00075.safetensors", + "model.layers.70.self_attn.k_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.70.self_attn.o_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.70.self_attn.q_norm.weight": "model-00056-of-00075.safetensors", + "model.layers.70.self_attn.q_proj.bias": "model-00056-of-00075.safetensors", + "model.layers.70.self_attn.q_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.70.self_attn.v_proj.bias": "model-00056-of-00075.safetensors", + "model.layers.70.self_attn.v_proj.weight": "model-00056-of-00075.safetensors", + "model.layers.71.input_layernorm.weight": "model-00058-of-00075.safetensors", + "model.layers.71.mlp.experts.0.down_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.0.gate_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.0.up_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.1.down_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.1.gate_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.1.up_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.10.down_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.10.gate_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.10.up_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.11.down_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.11.gate_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.11.up_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.12.down_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.12.gate_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.12.up_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.13.down_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.13.gate_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.13.up_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.14.down_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.14.gate_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.14.up_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.15.down_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.15.gate_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.15.up_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.16.down_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.16.gate_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.16.up_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.17.down_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.17.gate_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.17.up_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.18.down_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.18.gate_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.18.up_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.19.down_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.19.gate_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.19.up_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.2.down_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.2.gate_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.2.up_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.20.down_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.20.gate_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.20.up_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.21.down_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.21.gate_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.21.up_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.22.down_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.22.gate_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.22.up_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.23.down_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.23.gate_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.23.up_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.24.down_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.24.gate_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.24.up_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.25.down_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.25.gate_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.25.up_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.26.down_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.26.gate_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.26.up_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.27.down_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.27.gate_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.27.up_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.28.down_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.28.gate_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.28.up_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.29.down_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.29.gate_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.29.up_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.3.down_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.3.gate_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.3.up_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.30.down_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.30.gate_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.30.up_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.31.down_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.31.gate_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.31.up_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.32.down_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.32.gate_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.32.up_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.33.down_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.33.gate_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.33.up_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.34.down_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.34.gate_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.34.up_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.35.down_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.35.gate_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.35.up_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.36.down_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.36.gate_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.36.up_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.37.down_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.37.gate_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.37.up_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.38.down_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.38.gate_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.38.up_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.39.down_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.39.gate_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.39.up_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.4.down_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.4.gate_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.4.up_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.40.down_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.40.gate_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.40.up_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.41.down_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.41.gate_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.41.up_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.42.down_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.42.gate_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.42.up_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.43.down_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.43.gate_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.43.up_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.44.down_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.44.gate_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.44.up_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.45.down_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.45.gate_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.45.up_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.46.down_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.71.mlp.experts.46.gate_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.46.up_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.47.down_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.71.mlp.experts.47.gate_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.71.mlp.experts.47.up_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.71.mlp.experts.48.down_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.71.mlp.experts.48.gate_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.71.mlp.experts.48.up_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.71.mlp.experts.49.down_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.71.mlp.experts.49.gate_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.71.mlp.experts.49.up_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.71.mlp.experts.5.down_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.5.gate_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.5.up_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.50.down_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.71.mlp.experts.50.gate_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.71.mlp.experts.50.up_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.71.mlp.experts.51.down_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.71.mlp.experts.51.gate_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.71.mlp.experts.51.up_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.71.mlp.experts.52.down_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.71.mlp.experts.52.gate_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.71.mlp.experts.52.up_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.71.mlp.experts.53.down_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.71.mlp.experts.53.gate_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.71.mlp.experts.53.up_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.71.mlp.experts.54.down_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.71.mlp.experts.54.gate_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.71.mlp.experts.54.up_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.71.mlp.experts.55.down_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.71.mlp.experts.55.gate_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.71.mlp.experts.55.up_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.71.mlp.experts.56.down_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.71.mlp.experts.56.gate_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.71.mlp.experts.56.up_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.71.mlp.experts.57.down_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.71.mlp.experts.57.gate_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.71.mlp.experts.57.up_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.71.mlp.experts.58.down_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.71.mlp.experts.58.gate_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.71.mlp.experts.58.up_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.71.mlp.experts.59.down_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.71.mlp.experts.59.gate_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.71.mlp.experts.59.up_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.71.mlp.experts.6.down_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.6.gate_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.6.up_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.60.down_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.71.mlp.experts.60.gate_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.71.mlp.experts.60.up_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.71.mlp.experts.61.down_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.71.mlp.experts.61.gate_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.71.mlp.experts.61.up_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.71.mlp.experts.62.down_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.71.mlp.experts.62.gate_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.71.mlp.experts.62.up_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.71.mlp.experts.63.down_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.71.mlp.experts.63.gate_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.71.mlp.experts.63.up_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.71.mlp.experts.64.down_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.71.mlp.experts.64.gate_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.71.mlp.experts.64.up_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.71.mlp.experts.65.down_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.71.mlp.experts.65.gate_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.71.mlp.experts.65.up_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.71.mlp.experts.66.down_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.71.mlp.experts.66.gate_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.71.mlp.experts.66.up_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.71.mlp.experts.67.down_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.71.mlp.experts.67.gate_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.71.mlp.experts.67.up_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.71.mlp.experts.68.down_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.71.mlp.experts.68.gate_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.71.mlp.experts.68.up_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.71.mlp.experts.69.down_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.71.mlp.experts.69.gate_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.71.mlp.experts.69.up_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.71.mlp.experts.7.down_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.7.gate_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.7.up_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.70.down_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.71.mlp.experts.70.gate_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.71.mlp.experts.70.up_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.71.mlp.experts.71.down_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.71.mlp.experts.71.gate_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.71.mlp.experts.71.up_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.71.mlp.experts.72.down_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.71.mlp.experts.72.gate_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.71.mlp.experts.72.up_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.71.mlp.experts.73.down_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.71.mlp.experts.73.gate_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.71.mlp.experts.73.up_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.71.mlp.experts.74.down_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.71.mlp.experts.74.gate_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.71.mlp.experts.74.up_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.71.mlp.experts.75.down_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.71.mlp.experts.75.gate_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.71.mlp.experts.75.up_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.71.mlp.experts.76.down_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.71.mlp.experts.76.gate_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.71.mlp.experts.76.up_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.71.mlp.experts.77.down_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.71.mlp.experts.77.gate_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.71.mlp.experts.77.up_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.71.mlp.experts.78.down_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.71.mlp.experts.78.gate_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.71.mlp.experts.78.up_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.71.mlp.experts.79.down_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.71.mlp.experts.79.gate_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.71.mlp.experts.79.up_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.71.mlp.experts.8.down_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.8.gate_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.8.up_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.9.down_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.9.gate_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.experts.9.up_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.mlp.gate.e_score_correction_bias": "model-00058-of-00075.safetensors", + "model.layers.71.mlp.gate.weight": "model-00058-of-00075.safetensors", + "model.layers.71.mlp.shared_experts.down_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.71.mlp.shared_experts.gate_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.71.mlp.shared_experts.up_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.71.post_attention_layernorm.weight": "model-00058-of-00075.safetensors", + "model.layers.71.self_attn.k_norm.weight": "model-00057-of-00075.safetensors", + "model.layers.71.self_attn.k_proj.bias": "model-00057-of-00075.safetensors", + "model.layers.71.self_attn.k_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.self_attn.o_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.self_attn.q_norm.weight": "model-00057-of-00075.safetensors", + "model.layers.71.self_attn.q_proj.bias": "model-00057-of-00075.safetensors", + "model.layers.71.self_attn.q_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.71.self_attn.v_proj.bias": "model-00057-of-00075.safetensors", + "model.layers.71.self_attn.v_proj.weight": "model-00057-of-00075.safetensors", + "model.layers.72.input_layernorm.weight": "model-00059-of-00075.safetensors", + "model.layers.72.mlp.experts.0.down_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.0.gate_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.0.up_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.1.down_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.1.gate_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.1.up_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.10.down_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.10.gate_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.10.up_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.11.down_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.11.gate_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.11.up_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.12.down_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.12.gate_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.12.up_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.13.down_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.13.gate_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.13.up_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.14.down_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.14.gate_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.14.up_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.15.down_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.15.gate_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.15.up_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.16.down_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.16.gate_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.16.up_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.17.down_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.17.gate_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.17.up_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.18.down_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.18.gate_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.18.up_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.19.down_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.19.gate_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.19.up_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.2.down_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.2.gate_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.2.up_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.20.down_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.20.gate_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.20.up_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.21.down_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.21.gate_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.21.up_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.22.down_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.22.gate_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.22.up_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.23.down_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.23.gate_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.23.up_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.24.down_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.24.gate_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.24.up_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.25.down_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.25.gate_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.25.up_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.26.down_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.26.gate_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.26.up_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.27.down_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.27.gate_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.27.up_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.28.down_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.28.gate_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.28.up_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.29.down_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.29.gate_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.29.up_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.3.down_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.3.gate_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.3.up_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.30.down_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.30.gate_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.30.up_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.31.down_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.31.gate_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.31.up_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.32.down_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.32.gate_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.32.up_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.33.down_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.33.gate_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.33.up_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.34.down_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.34.gate_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.34.up_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.35.down_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.35.gate_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.35.up_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.36.down_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.36.gate_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.36.up_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.37.down_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.37.gate_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.37.up_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.38.down_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.38.gate_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.38.up_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.39.down_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.39.gate_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.39.up_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.4.down_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.4.gate_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.4.up_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.40.down_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.40.gate_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.40.up_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.41.down_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.41.gate_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.41.up_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.42.down_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.42.gate_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.42.up_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.43.down_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.43.gate_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.43.up_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.44.down_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.44.gate_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.44.up_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.45.down_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.45.gate_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.45.up_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.46.down_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.46.gate_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.46.up_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.47.down_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.47.gate_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.47.up_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.48.down_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.48.gate_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.48.up_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.49.down_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.49.gate_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.49.up_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.5.down_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.5.gate_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.5.up_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.50.down_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.50.gate_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.50.up_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.51.down_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.51.gate_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.51.up_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.52.down_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.52.gate_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.52.up_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.53.down_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.53.gate_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.53.up_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.54.down_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.54.gate_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.54.up_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.55.down_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.55.gate_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.55.up_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.56.down_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.56.gate_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.56.up_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.57.down_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.57.gate_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.57.up_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.58.down_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.58.gate_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.58.up_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.59.down_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.59.gate_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.59.up_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.6.down_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.6.gate_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.6.up_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.60.down_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.60.gate_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.60.up_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.61.down_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.61.gate_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.61.up_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.62.down_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.62.gate_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.62.up_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.63.down_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.63.gate_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.63.up_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.64.down_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.64.gate_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.64.up_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.65.down_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.72.mlp.experts.65.gate_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.65.up_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.66.down_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.72.mlp.experts.66.gate_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.72.mlp.experts.66.up_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.72.mlp.experts.67.down_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.72.mlp.experts.67.gate_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.72.mlp.experts.67.up_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.72.mlp.experts.68.down_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.72.mlp.experts.68.gate_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.72.mlp.experts.68.up_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.72.mlp.experts.69.down_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.72.mlp.experts.69.gate_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.72.mlp.experts.69.up_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.72.mlp.experts.7.down_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.7.gate_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.7.up_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.70.down_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.72.mlp.experts.70.gate_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.72.mlp.experts.70.up_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.72.mlp.experts.71.down_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.72.mlp.experts.71.gate_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.72.mlp.experts.71.up_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.72.mlp.experts.72.down_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.72.mlp.experts.72.gate_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.72.mlp.experts.72.up_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.72.mlp.experts.73.down_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.72.mlp.experts.73.gate_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.72.mlp.experts.73.up_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.72.mlp.experts.74.down_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.72.mlp.experts.74.gate_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.72.mlp.experts.74.up_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.72.mlp.experts.75.down_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.72.mlp.experts.75.gate_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.72.mlp.experts.75.up_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.72.mlp.experts.76.down_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.72.mlp.experts.76.gate_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.72.mlp.experts.76.up_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.72.mlp.experts.77.down_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.72.mlp.experts.77.gate_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.72.mlp.experts.77.up_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.72.mlp.experts.78.down_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.72.mlp.experts.78.gate_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.72.mlp.experts.78.up_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.72.mlp.experts.79.down_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.72.mlp.experts.79.gate_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.72.mlp.experts.79.up_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.72.mlp.experts.8.down_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.8.gate_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.8.up_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.9.down_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.9.gate_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.experts.9.up_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.mlp.gate.e_score_correction_bias": "model-00059-of-00075.safetensors", + "model.layers.72.mlp.gate.weight": "model-00059-of-00075.safetensors", + "model.layers.72.mlp.shared_experts.down_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.72.mlp.shared_experts.gate_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.72.mlp.shared_experts.up_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.72.post_attention_layernorm.weight": "model-00059-of-00075.safetensors", + "model.layers.72.self_attn.k_norm.weight": "model-00058-of-00075.safetensors", + "model.layers.72.self_attn.k_proj.bias": "model-00058-of-00075.safetensors", + "model.layers.72.self_attn.k_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.self_attn.o_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.self_attn.q_norm.weight": "model-00058-of-00075.safetensors", + "model.layers.72.self_attn.q_proj.bias": "model-00058-of-00075.safetensors", + "model.layers.72.self_attn.q_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.72.self_attn.v_proj.bias": "model-00058-of-00075.safetensors", + "model.layers.72.self_attn.v_proj.weight": "model-00058-of-00075.safetensors", + "model.layers.73.input_layernorm.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.0.down_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.0.gate_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.0.up_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.1.down_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.1.gate_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.1.up_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.10.down_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.10.gate_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.10.up_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.11.down_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.11.gate_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.11.up_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.12.down_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.12.gate_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.12.up_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.13.down_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.13.gate_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.13.up_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.14.down_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.14.gate_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.14.up_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.15.down_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.15.gate_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.15.up_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.16.down_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.16.gate_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.16.up_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.17.down_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.17.gate_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.17.up_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.18.down_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.18.gate_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.18.up_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.19.down_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.19.gate_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.19.up_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.2.down_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.2.gate_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.2.up_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.20.down_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.20.gate_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.20.up_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.21.down_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.21.gate_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.21.up_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.22.down_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.22.gate_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.22.up_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.23.down_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.23.gate_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.23.up_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.24.down_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.24.gate_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.24.up_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.25.down_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.25.gate_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.25.up_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.26.down_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.26.gate_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.26.up_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.27.down_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.27.gate_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.27.up_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.28.down_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.28.gate_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.28.up_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.29.down_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.29.gate_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.29.up_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.3.down_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.3.gate_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.3.up_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.30.down_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.30.gate_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.30.up_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.31.down_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.31.gate_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.31.up_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.32.down_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.32.gate_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.32.up_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.33.down_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.33.gate_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.33.up_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.34.down_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.34.gate_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.34.up_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.35.down_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.35.gate_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.35.up_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.36.down_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.36.gate_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.36.up_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.37.down_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.37.gate_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.37.up_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.38.down_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.38.gate_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.38.up_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.39.down_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.39.gate_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.39.up_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.4.down_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.4.gate_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.4.up_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.40.down_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.40.gate_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.40.up_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.41.down_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.41.gate_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.41.up_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.42.down_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.42.gate_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.42.up_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.43.down_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.43.gate_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.43.up_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.44.down_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.44.gate_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.44.up_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.45.down_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.45.gate_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.45.up_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.46.down_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.46.gate_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.46.up_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.47.down_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.47.gate_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.47.up_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.48.down_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.48.gate_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.48.up_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.49.down_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.49.gate_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.49.up_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.5.down_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.5.gate_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.5.up_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.50.down_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.50.gate_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.50.up_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.51.down_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.51.gate_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.51.up_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.52.down_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.52.gate_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.52.up_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.53.down_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.53.gate_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.53.up_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.54.down_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.54.gate_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.54.up_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.55.down_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.55.gate_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.55.up_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.56.down_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.56.gate_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.56.up_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.57.down_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.57.gate_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.57.up_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.58.down_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.58.gate_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.58.up_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.59.down_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.59.gate_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.59.up_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.6.down_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.6.gate_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.6.up_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.60.down_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.60.gate_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.60.up_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.61.down_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.61.gate_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.61.up_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.62.down_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.62.gate_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.62.up_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.63.down_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.63.gate_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.63.up_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.64.down_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.64.gate_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.64.up_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.65.down_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.65.gate_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.65.up_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.66.down_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.66.gate_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.66.up_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.67.down_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.67.gate_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.67.up_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.68.down_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.68.gate_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.68.up_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.69.down_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.69.gate_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.69.up_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.7.down_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.7.gate_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.7.up_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.70.down_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.70.gate_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.70.up_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.71.down_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.71.gate_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.71.up_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.72.down_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.72.gate_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.72.up_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.73.down_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.73.gate_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.73.up_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.74.down_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.74.gate_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.74.up_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.75.down_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.75.gate_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.75.up_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.76.down_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.76.gate_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.76.up_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.77.down_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.77.gate_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.77.up_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.78.down_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.78.gate_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.78.up_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.79.down_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.79.gate_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.79.up_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.8.down_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.8.gate_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.8.up_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.9.down_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.9.gate_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.experts.9.up_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.gate.e_score_correction_bias": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.gate.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.shared_experts.down_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.shared_experts.gate_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.mlp.shared_experts.up_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.post_attention_layernorm.weight": "model-00059-of-00075.safetensors", + "model.layers.73.self_attn.k_norm.weight": "model-00059-of-00075.safetensors", + "model.layers.73.self_attn.k_proj.bias": "model-00059-of-00075.safetensors", + "model.layers.73.self_attn.k_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.self_attn.o_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.self_attn.q_norm.weight": "model-00059-of-00075.safetensors", + "model.layers.73.self_attn.q_proj.bias": "model-00059-of-00075.safetensors", + "model.layers.73.self_attn.q_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.73.self_attn.v_proj.bias": "model-00059-of-00075.safetensors", + "model.layers.73.self_attn.v_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.74.input_layernorm.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.0.down_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.0.gate_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.0.up_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.1.down_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.1.gate_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.1.up_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.10.down_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.10.gate_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.10.up_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.11.down_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.11.gate_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.11.up_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.12.down_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.12.gate_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.12.up_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.13.down_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.13.gate_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.13.up_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.14.down_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.14.gate_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.14.up_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.15.down_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.15.gate_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.15.up_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.16.down_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.16.gate_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.16.up_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.17.down_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.17.gate_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.17.up_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.18.down_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.18.gate_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.18.up_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.19.down_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.19.gate_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.19.up_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.2.down_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.2.gate_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.2.up_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.20.down_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.20.gate_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.20.up_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.21.down_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.21.gate_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.21.up_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.22.down_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.22.gate_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.22.up_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.23.down_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.23.gate_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.23.up_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.24.down_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.24.gate_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.24.up_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.25.down_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.25.gate_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.25.up_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.26.down_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.26.gate_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.26.up_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.27.down_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.27.gate_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.27.up_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.28.down_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.28.gate_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.28.up_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.29.down_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.29.gate_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.29.up_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.3.down_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.3.gate_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.3.up_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.30.down_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.30.gate_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.30.up_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.31.down_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.31.gate_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.31.up_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.32.down_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.32.gate_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.32.up_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.33.down_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.33.gate_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.33.up_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.34.down_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.34.gate_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.34.up_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.35.down_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.35.gate_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.35.up_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.36.down_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.36.gate_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.36.up_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.37.down_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.37.gate_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.37.up_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.38.down_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.38.gate_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.38.up_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.39.down_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.39.gate_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.39.up_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.4.down_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.4.gate_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.4.up_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.40.down_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.40.gate_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.40.up_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.41.down_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.41.gate_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.41.up_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.42.down_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.42.gate_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.42.up_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.43.down_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.43.gate_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.43.up_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.44.down_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.44.gate_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.44.up_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.45.down_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.45.gate_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.45.up_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.46.down_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.46.gate_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.46.up_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.47.down_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.47.gate_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.47.up_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.48.down_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.48.gate_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.48.up_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.49.down_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.49.gate_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.49.up_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.5.down_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.5.gate_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.5.up_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.50.down_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.50.gate_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.50.up_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.51.down_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.51.gate_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.51.up_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.52.down_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.52.gate_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.52.up_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.53.down_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.53.gate_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.53.up_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.54.down_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.54.gate_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.54.up_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.55.down_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.55.gate_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.55.up_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.56.down_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.56.gate_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.56.up_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.57.down_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.57.gate_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.57.up_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.58.down_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.58.gate_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.58.up_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.59.down_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.59.gate_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.59.up_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.6.down_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.6.gate_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.6.up_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.60.down_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.60.gate_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.60.up_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.61.down_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.61.gate_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.61.up_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.62.down_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.62.gate_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.62.up_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.63.down_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.63.gate_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.63.up_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.64.down_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.64.gate_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.64.up_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.65.down_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.65.gate_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.65.up_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.66.down_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.66.gate_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.66.up_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.67.down_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.67.gate_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.67.up_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.68.down_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.68.gate_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.68.up_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.69.down_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.69.gate_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.69.up_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.7.down_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.7.gate_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.7.up_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.70.down_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.70.gate_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.70.up_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.71.down_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.71.gate_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.71.up_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.72.down_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.72.gate_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.72.up_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.73.down_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.73.gate_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.73.up_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.74.down_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.74.gate_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.74.up_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.75.down_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.75.gate_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.75.up_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.76.down_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.76.gate_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.76.up_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.77.down_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.77.gate_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.77.up_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.78.down_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.78.gate_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.78.up_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.79.down_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.79.gate_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.79.up_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.8.down_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.8.gate_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.8.up_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.9.down_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.9.gate_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.experts.9.up_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.gate.e_score_correction_bias": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.gate.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.shared_experts.down_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.shared_experts.gate_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.mlp.shared_experts.up_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.post_attention_layernorm.weight": "model-00060-of-00075.safetensors", + "model.layers.74.self_attn.k_norm.weight": "model-00060-of-00075.safetensors", + "model.layers.74.self_attn.k_proj.bias": "model-00059-of-00075.safetensors", + "model.layers.74.self_attn.k_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.74.self_attn.o_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.74.self_attn.q_norm.weight": "model-00060-of-00075.safetensors", + "model.layers.74.self_attn.q_proj.bias": "model-00059-of-00075.safetensors", + "model.layers.74.self_attn.q_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.74.self_attn.v_proj.bias": "model-00059-of-00075.safetensors", + "model.layers.74.self_attn.v_proj.weight": "model-00059-of-00075.safetensors", + "model.layers.75.input_layernorm.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.0.down_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.75.mlp.experts.0.gate_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.75.mlp.experts.0.up_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.75.mlp.experts.1.down_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.75.mlp.experts.1.gate_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.75.mlp.experts.1.up_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.75.mlp.experts.10.down_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.75.mlp.experts.10.gate_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.75.mlp.experts.10.up_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.75.mlp.experts.11.down_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.75.mlp.experts.11.gate_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.75.mlp.experts.11.up_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.75.mlp.experts.12.down_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.75.mlp.experts.12.gate_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.75.mlp.experts.12.up_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.75.mlp.experts.13.down_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.75.mlp.experts.13.gate_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.75.mlp.experts.13.up_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.75.mlp.experts.14.down_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.75.mlp.experts.14.gate_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.75.mlp.experts.14.up_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.75.mlp.experts.15.down_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.75.mlp.experts.15.gate_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.75.mlp.experts.15.up_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.75.mlp.experts.16.down_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.16.gate_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.75.mlp.experts.16.up_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.17.down_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.17.gate_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.17.up_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.18.down_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.18.gate_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.18.up_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.19.down_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.19.gate_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.19.up_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.2.down_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.75.mlp.experts.2.gate_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.75.mlp.experts.2.up_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.75.mlp.experts.20.down_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.20.gate_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.20.up_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.21.down_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.21.gate_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.21.up_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.22.down_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.22.gate_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.22.up_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.23.down_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.23.gate_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.23.up_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.24.down_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.24.gate_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.24.up_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.25.down_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.25.gate_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.25.up_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.26.down_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.26.gate_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.26.up_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.27.down_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.27.gate_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.27.up_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.28.down_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.28.gate_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.28.up_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.29.down_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.29.gate_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.29.up_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.3.down_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.75.mlp.experts.3.gate_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.75.mlp.experts.3.up_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.75.mlp.experts.30.down_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.30.gate_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.30.up_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.31.down_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.31.gate_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.31.up_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.32.down_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.32.gate_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.32.up_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.33.down_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.33.gate_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.33.up_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.34.down_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.34.gate_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.34.up_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.35.down_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.35.gate_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.35.up_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.36.down_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.36.gate_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.36.up_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.37.down_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.37.gate_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.37.up_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.38.down_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.38.gate_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.38.up_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.39.down_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.39.gate_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.39.up_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.4.down_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.75.mlp.experts.4.gate_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.75.mlp.experts.4.up_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.75.mlp.experts.40.down_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.40.gate_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.40.up_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.41.down_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.41.gate_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.41.up_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.42.down_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.42.gate_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.42.up_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.43.down_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.43.gate_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.43.up_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.44.down_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.44.gate_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.44.up_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.45.down_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.45.gate_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.45.up_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.46.down_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.46.gate_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.46.up_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.47.down_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.47.gate_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.47.up_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.48.down_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.48.gate_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.48.up_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.49.down_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.49.gate_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.49.up_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.5.down_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.75.mlp.experts.5.gate_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.75.mlp.experts.5.up_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.75.mlp.experts.50.down_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.50.gate_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.50.up_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.51.down_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.51.gate_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.51.up_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.52.down_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.52.gate_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.52.up_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.53.down_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.53.gate_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.53.up_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.54.down_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.54.gate_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.54.up_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.55.down_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.55.gate_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.55.up_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.56.down_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.56.gate_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.56.up_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.57.down_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.57.gate_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.57.up_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.58.down_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.58.gate_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.58.up_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.59.down_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.59.gate_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.59.up_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.6.down_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.75.mlp.experts.6.gate_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.75.mlp.experts.6.up_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.75.mlp.experts.60.down_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.60.gate_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.60.up_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.61.down_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.61.gate_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.61.up_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.62.down_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.62.gate_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.62.up_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.63.down_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.63.gate_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.63.up_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.64.down_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.64.gate_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.64.up_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.65.down_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.65.gate_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.65.up_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.66.down_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.66.gate_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.66.up_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.67.down_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.67.gate_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.67.up_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.68.down_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.68.gate_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.68.up_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.69.down_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.69.gate_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.69.up_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.7.down_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.75.mlp.experts.7.gate_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.75.mlp.experts.7.up_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.75.mlp.experts.70.down_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.70.gate_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.70.up_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.71.down_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.71.gate_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.71.up_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.72.down_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.72.gate_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.72.up_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.73.down_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.73.gate_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.73.up_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.74.down_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.74.gate_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.74.up_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.75.down_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.75.gate_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.75.up_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.76.down_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.76.gate_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.76.up_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.77.down_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.77.gate_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.77.up_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.78.down_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.78.gate_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.78.up_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.79.down_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.79.gate_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.79.up_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.experts.8.down_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.75.mlp.experts.8.gate_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.75.mlp.experts.8.up_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.75.mlp.experts.9.down_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.75.mlp.experts.9.gate_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.75.mlp.experts.9.up_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.75.mlp.gate.e_score_correction_bias": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.gate.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.shared_experts.down_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.shared_experts.gate_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.mlp.shared_experts.up_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.75.post_attention_layernorm.weight": "model-00061-of-00075.safetensors", + "model.layers.75.self_attn.k_norm.weight": "model-00060-of-00075.safetensors", + "model.layers.75.self_attn.k_proj.bias": "model-00060-of-00075.safetensors", + "model.layers.75.self_attn.k_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.75.self_attn.o_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.75.self_attn.q_norm.weight": "model-00060-of-00075.safetensors", + "model.layers.75.self_attn.q_proj.bias": "model-00060-of-00075.safetensors", + "model.layers.75.self_attn.q_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.75.self_attn.v_proj.bias": "model-00060-of-00075.safetensors", + "model.layers.75.self_attn.v_proj.weight": "model-00060-of-00075.safetensors", + "model.layers.76.input_layernorm.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.0.down_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.76.mlp.experts.0.gate_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.76.mlp.experts.0.up_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.76.mlp.experts.1.down_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.76.mlp.experts.1.gate_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.76.mlp.experts.1.up_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.76.mlp.experts.10.down_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.76.mlp.experts.10.gate_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.76.mlp.experts.10.up_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.76.mlp.experts.11.down_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.76.mlp.experts.11.gate_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.76.mlp.experts.11.up_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.76.mlp.experts.12.down_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.76.mlp.experts.12.gate_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.76.mlp.experts.12.up_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.76.mlp.experts.13.down_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.76.mlp.experts.13.gate_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.76.mlp.experts.13.up_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.76.mlp.experts.14.down_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.76.mlp.experts.14.gate_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.76.mlp.experts.14.up_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.76.mlp.experts.15.down_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.76.mlp.experts.15.gate_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.76.mlp.experts.15.up_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.76.mlp.experts.16.down_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.76.mlp.experts.16.gate_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.76.mlp.experts.16.up_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.76.mlp.experts.17.down_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.76.mlp.experts.17.gate_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.76.mlp.experts.17.up_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.76.mlp.experts.18.down_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.76.mlp.experts.18.gate_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.76.mlp.experts.18.up_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.76.mlp.experts.19.down_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.76.mlp.experts.19.gate_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.76.mlp.experts.19.up_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.76.mlp.experts.2.down_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.76.mlp.experts.2.gate_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.76.mlp.experts.2.up_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.76.mlp.experts.20.down_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.76.mlp.experts.20.gate_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.76.mlp.experts.20.up_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.76.mlp.experts.21.down_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.76.mlp.experts.21.gate_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.76.mlp.experts.21.up_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.76.mlp.experts.22.down_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.76.mlp.experts.22.gate_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.76.mlp.experts.22.up_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.76.mlp.experts.23.down_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.76.mlp.experts.23.gate_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.76.mlp.experts.23.up_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.76.mlp.experts.24.down_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.76.mlp.experts.24.gate_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.76.mlp.experts.24.up_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.76.mlp.experts.25.down_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.76.mlp.experts.25.gate_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.76.mlp.experts.25.up_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.76.mlp.experts.26.down_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.76.mlp.experts.26.gate_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.76.mlp.experts.26.up_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.76.mlp.experts.27.down_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.76.mlp.experts.27.gate_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.76.mlp.experts.27.up_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.76.mlp.experts.28.down_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.76.mlp.experts.28.gate_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.76.mlp.experts.28.up_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.76.mlp.experts.29.down_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.76.mlp.experts.29.gate_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.76.mlp.experts.29.up_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.76.mlp.experts.3.down_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.76.mlp.experts.3.gate_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.76.mlp.experts.3.up_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.76.mlp.experts.30.down_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.76.mlp.experts.30.gate_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.76.mlp.experts.30.up_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.76.mlp.experts.31.down_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.76.mlp.experts.31.gate_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.76.mlp.experts.31.up_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.76.mlp.experts.32.down_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.76.mlp.experts.32.gate_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.76.mlp.experts.32.up_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.76.mlp.experts.33.down_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.76.mlp.experts.33.gate_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.76.mlp.experts.33.up_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.76.mlp.experts.34.down_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.76.mlp.experts.34.gate_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.76.mlp.experts.34.up_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.76.mlp.experts.35.down_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.35.gate_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.76.mlp.experts.35.up_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.36.down_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.36.gate_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.36.up_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.37.down_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.37.gate_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.37.up_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.38.down_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.38.gate_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.38.up_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.39.down_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.39.gate_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.39.up_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.4.down_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.76.mlp.experts.4.gate_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.76.mlp.experts.4.up_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.76.mlp.experts.40.down_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.40.gate_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.40.up_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.41.down_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.41.gate_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.41.up_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.42.down_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.42.gate_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.42.up_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.43.down_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.43.gate_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.43.up_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.44.down_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.44.gate_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.44.up_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.45.down_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.45.gate_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.45.up_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.46.down_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.46.gate_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.46.up_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.47.down_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.47.gate_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.47.up_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.48.down_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.48.gate_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.48.up_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.49.down_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.49.gate_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.49.up_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.5.down_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.76.mlp.experts.5.gate_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.76.mlp.experts.5.up_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.76.mlp.experts.50.down_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.50.gate_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.50.up_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.51.down_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.51.gate_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.51.up_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.52.down_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.52.gate_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.52.up_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.53.down_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.53.gate_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.53.up_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.54.down_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.54.gate_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.54.up_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.55.down_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.55.gate_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.55.up_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.56.down_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.56.gate_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.56.up_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.57.down_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.57.gate_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.57.up_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.58.down_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.58.gate_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.58.up_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.59.down_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.59.gate_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.59.up_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.6.down_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.76.mlp.experts.6.gate_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.76.mlp.experts.6.up_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.76.mlp.experts.60.down_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.60.gate_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.60.up_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.61.down_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.61.gate_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.61.up_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.62.down_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.62.gate_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.62.up_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.63.down_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.63.gate_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.63.up_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.64.down_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.64.gate_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.64.up_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.65.down_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.65.gate_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.65.up_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.66.down_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.66.gate_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.66.up_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.67.down_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.67.gate_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.67.up_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.68.down_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.68.gate_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.68.up_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.69.down_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.69.gate_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.69.up_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.7.down_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.76.mlp.experts.7.gate_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.76.mlp.experts.7.up_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.76.mlp.experts.70.down_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.70.gate_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.70.up_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.71.down_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.71.gate_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.71.up_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.72.down_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.72.gate_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.72.up_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.73.down_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.73.gate_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.73.up_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.74.down_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.74.gate_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.74.up_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.75.down_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.75.gate_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.75.up_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.76.down_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.76.gate_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.76.up_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.77.down_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.77.gate_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.77.up_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.78.down_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.78.gate_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.78.up_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.79.down_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.79.gate_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.79.up_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.experts.8.down_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.76.mlp.experts.8.gate_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.76.mlp.experts.8.up_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.76.mlp.experts.9.down_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.76.mlp.experts.9.gate_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.76.mlp.experts.9.up_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.76.mlp.gate.e_score_correction_bias": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.gate.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.shared_experts.down_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.shared_experts.gate_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.mlp.shared_experts.up_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.76.post_attention_layernorm.weight": "model-00062-of-00075.safetensors", + "model.layers.76.self_attn.k_norm.weight": "model-00061-of-00075.safetensors", + "model.layers.76.self_attn.k_proj.bias": "model-00061-of-00075.safetensors", + "model.layers.76.self_attn.k_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.76.self_attn.o_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.76.self_attn.q_norm.weight": "model-00061-of-00075.safetensors", + "model.layers.76.self_attn.q_proj.bias": "model-00061-of-00075.safetensors", + "model.layers.76.self_attn.q_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.76.self_attn.v_proj.bias": "model-00061-of-00075.safetensors", + "model.layers.76.self_attn.v_proj.weight": "model-00061-of-00075.safetensors", + "model.layers.77.input_layernorm.weight": "model-00063-of-00075.safetensors", + "model.layers.77.mlp.experts.0.down_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.0.gate_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.0.up_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.1.down_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.1.gate_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.1.up_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.10.down_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.10.gate_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.10.up_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.11.down_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.11.gate_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.11.up_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.12.down_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.12.gate_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.12.up_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.13.down_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.13.gate_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.13.up_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.14.down_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.14.gate_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.14.up_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.15.down_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.15.gate_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.15.up_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.16.down_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.16.gate_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.16.up_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.17.down_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.17.gate_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.17.up_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.18.down_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.18.gate_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.18.up_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.19.down_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.19.gate_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.19.up_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.2.down_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.2.gate_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.2.up_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.20.down_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.20.gate_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.20.up_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.21.down_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.21.gate_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.21.up_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.22.down_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.22.gate_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.22.up_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.23.down_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.23.gate_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.23.up_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.24.down_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.24.gate_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.24.up_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.25.down_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.25.gate_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.25.up_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.26.down_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.26.gate_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.26.up_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.27.down_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.27.gate_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.27.up_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.28.down_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.28.gate_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.28.up_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.29.down_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.29.gate_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.29.up_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.3.down_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.3.gate_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.3.up_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.30.down_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.30.gate_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.30.up_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.31.down_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.31.gate_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.31.up_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.32.down_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.32.gate_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.32.up_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.33.down_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.33.gate_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.33.up_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.34.down_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.34.gate_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.34.up_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.35.down_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.35.gate_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.35.up_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.36.down_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.36.gate_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.36.up_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.37.down_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.37.gate_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.37.up_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.38.down_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.38.gate_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.38.up_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.39.down_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.39.gate_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.39.up_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.4.down_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.4.gate_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.4.up_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.40.down_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.40.gate_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.40.up_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.41.down_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.41.gate_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.41.up_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.42.down_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.42.gate_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.42.up_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.43.down_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.43.gate_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.43.up_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.44.down_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.44.gate_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.44.up_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.45.down_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.45.gate_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.45.up_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.46.down_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.46.gate_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.46.up_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.47.down_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.47.gate_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.47.up_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.48.down_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.48.gate_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.48.up_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.49.down_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.49.gate_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.49.up_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.5.down_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.5.gate_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.5.up_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.50.down_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.50.gate_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.50.up_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.51.down_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.51.gate_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.51.up_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.52.down_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.52.gate_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.52.up_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.53.down_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.53.gate_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.53.up_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.54.down_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.77.mlp.experts.54.gate_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.54.up_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.77.mlp.experts.55.down_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.77.mlp.experts.55.gate_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.77.mlp.experts.55.up_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.77.mlp.experts.56.down_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.77.mlp.experts.56.gate_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.77.mlp.experts.56.up_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.77.mlp.experts.57.down_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.77.mlp.experts.57.gate_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.77.mlp.experts.57.up_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.77.mlp.experts.58.down_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.77.mlp.experts.58.gate_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.77.mlp.experts.58.up_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.77.mlp.experts.59.down_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.77.mlp.experts.59.gate_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.77.mlp.experts.59.up_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.77.mlp.experts.6.down_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.6.gate_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.6.up_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.60.down_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.77.mlp.experts.60.gate_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.77.mlp.experts.60.up_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.77.mlp.experts.61.down_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.77.mlp.experts.61.gate_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.77.mlp.experts.61.up_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.77.mlp.experts.62.down_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.77.mlp.experts.62.gate_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.77.mlp.experts.62.up_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.77.mlp.experts.63.down_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.77.mlp.experts.63.gate_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.77.mlp.experts.63.up_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.77.mlp.experts.64.down_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.77.mlp.experts.64.gate_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.77.mlp.experts.64.up_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.77.mlp.experts.65.down_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.77.mlp.experts.65.gate_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.77.mlp.experts.65.up_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.77.mlp.experts.66.down_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.77.mlp.experts.66.gate_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.77.mlp.experts.66.up_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.77.mlp.experts.67.down_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.77.mlp.experts.67.gate_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.77.mlp.experts.67.up_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.77.mlp.experts.68.down_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.77.mlp.experts.68.gate_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.77.mlp.experts.68.up_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.77.mlp.experts.69.down_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.77.mlp.experts.69.gate_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.77.mlp.experts.69.up_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.77.mlp.experts.7.down_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.7.gate_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.7.up_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.70.down_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.77.mlp.experts.70.gate_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.77.mlp.experts.70.up_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.77.mlp.experts.71.down_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.77.mlp.experts.71.gate_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.77.mlp.experts.71.up_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.77.mlp.experts.72.down_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.77.mlp.experts.72.gate_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.77.mlp.experts.72.up_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.77.mlp.experts.73.down_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.77.mlp.experts.73.gate_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.77.mlp.experts.73.up_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.77.mlp.experts.74.down_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.77.mlp.experts.74.gate_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.77.mlp.experts.74.up_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.77.mlp.experts.75.down_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.77.mlp.experts.75.gate_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.77.mlp.experts.75.up_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.77.mlp.experts.76.down_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.77.mlp.experts.76.gate_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.77.mlp.experts.76.up_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.77.mlp.experts.77.down_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.77.mlp.experts.77.gate_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.77.mlp.experts.77.up_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.77.mlp.experts.78.down_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.77.mlp.experts.78.gate_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.77.mlp.experts.78.up_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.77.mlp.experts.79.down_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.77.mlp.experts.79.gate_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.77.mlp.experts.79.up_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.77.mlp.experts.8.down_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.8.gate_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.8.up_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.9.down_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.9.gate_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.experts.9.up_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.mlp.gate.e_score_correction_bias": "model-00063-of-00075.safetensors", + "model.layers.77.mlp.gate.weight": "model-00063-of-00075.safetensors", + "model.layers.77.mlp.shared_experts.down_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.77.mlp.shared_experts.gate_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.77.mlp.shared_experts.up_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.77.post_attention_layernorm.weight": "model-00063-of-00075.safetensors", + "model.layers.77.self_attn.k_norm.weight": "model-00062-of-00075.safetensors", + "model.layers.77.self_attn.k_proj.bias": "model-00062-of-00075.safetensors", + "model.layers.77.self_attn.k_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.self_attn.o_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.self_attn.q_norm.weight": "model-00062-of-00075.safetensors", + "model.layers.77.self_attn.q_proj.bias": "model-00062-of-00075.safetensors", + "model.layers.77.self_attn.q_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.77.self_attn.v_proj.bias": "model-00062-of-00075.safetensors", + "model.layers.77.self_attn.v_proj.weight": "model-00062-of-00075.safetensors", + "model.layers.78.input_layernorm.weight": "model-00064-of-00075.safetensors", + "model.layers.78.mlp.experts.0.down_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.0.gate_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.0.up_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.1.down_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.1.gate_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.1.up_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.10.down_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.10.gate_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.10.up_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.11.down_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.11.gate_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.11.up_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.12.down_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.12.gate_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.12.up_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.13.down_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.13.gate_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.13.up_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.14.down_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.14.gate_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.14.up_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.15.down_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.15.gate_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.15.up_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.16.down_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.16.gate_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.16.up_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.17.down_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.17.gate_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.17.up_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.18.down_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.18.gate_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.18.up_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.19.down_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.19.gate_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.19.up_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.2.down_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.2.gate_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.2.up_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.20.down_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.20.gate_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.20.up_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.21.down_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.21.gate_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.21.up_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.22.down_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.22.gate_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.22.up_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.23.down_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.23.gate_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.23.up_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.24.down_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.24.gate_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.24.up_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.25.down_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.25.gate_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.25.up_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.26.down_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.26.gate_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.26.up_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.27.down_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.27.gate_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.27.up_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.28.down_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.28.gate_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.28.up_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.29.down_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.29.gate_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.29.up_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.3.down_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.3.gate_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.3.up_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.30.down_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.30.gate_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.30.up_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.31.down_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.31.gate_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.31.up_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.32.down_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.32.gate_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.32.up_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.33.down_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.33.gate_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.33.up_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.34.down_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.34.gate_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.34.up_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.35.down_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.35.gate_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.35.up_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.36.down_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.36.gate_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.36.up_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.37.down_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.37.gate_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.37.up_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.38.down_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.38.gate_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.38.up_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.39.down_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.39.gate_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.39.up_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.4.down_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.4.gate_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.4.up_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.40.down_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.40.gate_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.40.up_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.41.down_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.41.gate_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.41.up_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.42.down_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.42.gate_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.42.up_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.43.down_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.43.gate_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.43.up_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.44.down_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.44.gate_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.44.up_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.45.down_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.45.gate_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.45.up_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.46.down_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.46.gate_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.46.up_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.47.down_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.47.gate_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.47.up_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.48.down_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.48.gate_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.48.up_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.49.down_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.49.gate_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.49.up_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.5.down_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.5.gate_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.5.up_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.50.down_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.50.gate_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.50.up_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.51.down_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.51.gate_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.51.up_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.52.down_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.52.gate_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.52.up_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.53.down_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.53.gate_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.53.up_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.54.down_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.54.gate_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.54.up_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.55.down_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.55.gate_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.55.up_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.56.down_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.56.gate_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.56.up_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.57.down_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.57.gate_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.57.up_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.58.down_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.58.gate_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.58.up_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.59.down_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.59.gate_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.59.up_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.6.down_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.6.gate_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.6.up_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.60.down_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.60.gate_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.60.up_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.61.down_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.61.gate_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.61.up_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.62.down_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.62.gate_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.62.up_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.63.down_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.63.gate_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.63.up_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.64.down_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.64.gate_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.64.up_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.65.down_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.65.gate_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.65.up_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.66.down_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.66.gate_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.66.up_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.67.down_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.67.gate_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.67.up_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.68.down_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.68.gate_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.68.up_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.69.down_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.69.gate_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.69.up_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.7.down_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.7.gate_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.7.up_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.70.down_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.70.gate_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.70.up_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.71.down_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.71.gate_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.71.up_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.72.down_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.72.gate_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.72.up_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.73.down_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.78.mlp.experts.73.gate_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.73.up_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.78.mlp.experts.74.down_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.78.mlp.experts.74.gate_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.78.mlp.experts.74.up_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.78.mlp.experts.75.down_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.78.mlp.experts.75.gate_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.78.mlp.experts.75.up_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.78.mlp.experts.76.down_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.78.mlp.experts.76.gate_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.78.mlp.experts.76.up_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.78.mlp.experts.77.down_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.78.mlp.experts.77.gate_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.78.mlp.experts.77.up_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.78.mlp.experts.78.down_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.78.mlp.experts.78.gate_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.78.mlp.experts.78.up_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.78.mlp.experts.79.down_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.78.mlp.experts.79.gate_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.78.mlp.experts.79.up_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.78.mlp.experts.8.down_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.8.gate_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.8.up_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.9.down_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.9.gate_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.experts.9.up_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.mlp.gate.e_score_correction_bias": "model-00064-of-00075.safetensors", + "model.layers.78.mlp.gate.weight": "model-00064-of-00075.safetensors", + "model.layers.78.mlp.shared_experts.down_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.78.mlp.shared_experts.gate_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.78.mlp.shared_experts.up_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.78.post_attention_layernorm.weight": "model-00064-of-00075.safetensors", + "model.layers.78.self_attn.k_norm.weight": "model-00063-of-00075.safetensors", + "model.layers.78.self_attn.k_proj.bias": "model-00063-of-00075.safetensors", + "model.layers.78.self_attn.k_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.self_attn.o_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.self_attn.q_norm.weight": "model-00063-of-00075.safetensors", + "model.layers.78.self_attn.q_proj.bias": "model-00063-of-00075.safetensors", + "model.layers.78.self_attn.q_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.78.self_attn.v_proj.bias": "model-00063-of-00075.safetensors", + "model.layers.78.self_attn.v_proj.weight": "model-00063-of-00075.safetensors", + "model.layers.79.input_layernorm.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.0.down_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.0.gate_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.0.up_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.1.down_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.1.gate_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.1.up_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.10.down_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.10.gate_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.10.up_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.11.down_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.11.gate_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.11.up_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.12.down_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.12.gate_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.12.up_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.13.down_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.13.gate_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.13.up_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.14.down_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.14.gate_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.14.up_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.15.down_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.15.gate_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.15.up_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.16.down_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.16.gate_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.16.up_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.17.down_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.17.gate_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.17.up_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.18.down_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.18.gate_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.18.up_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.19.down_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.19.gate_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.19.up_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.2.down_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.2.gate_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.2.up_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.20.down_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.20.gate_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.20.up_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.21.down_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.21.gate_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.21.up_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.22.down_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.22.gate_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.22.up_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.23.down_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.23.gate_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.23.up_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.24.down_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.24.gate_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.24.up_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.25.down_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.25.gate_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.25.up_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.26.down_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.26.gate_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.26.up_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.27.down_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.27.gate_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.27.up_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.28.down_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.28.gate_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.28.up_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.29.down_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.29.gate_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.29.up_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.3.down_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.3.gate_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.3.up_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.30.down_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.30.gate_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.30.up_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.31.down_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.31.gate_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.31.up_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.32.down_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.32.gate_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.32.up_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.33.down_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.33.gate_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.33.up_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.34.down_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.34.gate_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.34.up_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.35.down_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.35.gate_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.35.up_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.36.down_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.36.gate_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.36.up_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.37.down_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.37.gate_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.37.up_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.38.down_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.38.gate_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.38.up_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.39.down_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.39.gate_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.39.up_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.4.down_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.4.gate_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.4.up_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.40.down_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.40.gate_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.40.up_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.41.down_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.41.gate_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.41.up_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.42.down_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.42.gate_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.42.up_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.43.down_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.43.gate_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.43.up_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.44.down_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.44.gate_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.44.up_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.45.down_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.45.gate_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.45.up_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.46.down_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.46.gate_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.46.up_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.47.down_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.47.gate_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.47.up_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.48.down_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.48.gate_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.48.up_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.49.down_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.49.gate_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.49.up_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.5.down_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.5.gate_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.5.up_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.50.down_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.50.gate_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.50.up_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.51.down_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.51.gate_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.51.up_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.52.down_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.52.gate_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.52.up_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.53.down_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.53.gate_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.53.up_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.54.down_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.54.gate_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.54.up_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.55.down_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.55.gate_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.55.up_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.56.down_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.56.gate_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.56.up_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.57.down_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.57.gate_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.57.up_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.58.down_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.58.gate_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.58.up_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.59.down_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.59.gate_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.59.up_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.6.down_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.6.gate_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.6.up_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.60.down_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.60.gate_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.60.up_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.61.down_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.61.gate_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.61.up_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.62.down_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.62.gate_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.62.up_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.63.down_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.63.gate_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.63.up_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.64.down_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.64.gate_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.64.up_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.65.down_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.65.gate_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.65.up_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.66.down_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.66.gate_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.66.up_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.67.down_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.67.gate_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.67.up_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.68.down_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.68.gate_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.68.up_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.69.down_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.69.gate_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.69.up_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.7.down_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.7.gate_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.7.up_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.70.down_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.70.gate_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.70.up_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.71.down_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.71.gate_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.71.up_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.72.down_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.72.gate_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.72.up_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.73.down_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.73.gate_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.73.up_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.74.down_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.74.gate_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.74.up_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.75.down_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.75.gate_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.75.up_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.76.down_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.76.gate_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.76.up_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.77.down_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.77.gate_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.77.up_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.78.down_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.78.gate_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.78.up_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.79.down_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.79.gate_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.79.up_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.8.down_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.8.gate_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.8.up_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.9.down_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.9.gate_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.experts.9.up_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.gate.e_score_correction_bias": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.gate.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.shared_experts.down_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.shared_experts.gate_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.mlp.shared_experts.up_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.post_attention_layernorm.weight": "model-00064-of-00075.safetensors", + "model.layers.79.self_attn.k_norm.weight": "model-00064-of-00075.safetensors", + "model.layers.79.self_attn.k_proj.bias": "model-00064-of-00075.safetensors", + "model.layers.79.self_attn.k_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.self_attn.o_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.self_attn.q_norm.weight": "model-00064-of-00075.safetensors", + "model.layers.79.self_attn.q_proj.bias": "model-00064-of-00075.safetensors", + "model.layers.79.self_attn.q_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.79.self_attn.v_proj.bias": "model-00064-of-00075.safetensors", + "model.layers.79.self_attn.v_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.8.input_layernorm.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.0.down_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.8.mlp.experts.0.gate_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.8.mlp.experts.0.up_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.8.mlp.experts.1.down_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.8.mlp.experts.1.gate_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.8.mlp.experts.1.up_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.8.mlp.experts.10.down_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.8.mlp.experts.10.gate_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.8.mlp.experts.10.up_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.8.mlp.experts.11.down_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.8.mlp.experts.11.gate_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.8.mlp.experts.11.up_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.8.mlp.experts.12.down_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.8.mlp.experts.12.gate_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.8.mlp.experts.12.up_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.8.mlp.experts.13.down_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.13.gate_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.8.mlp.experts.13.up_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.14.down_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.14.gate_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.14.up_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.15.down_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.15.gate_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.15.up_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.16.down_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.16.gate_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.16.up_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.17.down_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.17.gate_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.17.up_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.18.down_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.18.gate_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.18.up_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.19.down_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.19.gate_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.19.up_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.2.down_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.8.mlp.experts.2.gate_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.8.mlp.experts.2.up_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.8.mlp.experts.20.down_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.20.gate_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.20.up_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.21.down_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.21.gate_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.21.up_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.22.down_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.22.gate_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.22.up_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.23.down_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.23.gate_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.23.up_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.24.down_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.24.gate_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.24.up_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.25.down_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.25.gate_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.25.up_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.26.down_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.26.gate_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.26.up_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.27.down_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.27.gate_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.27.up_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.28.down_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.28.gate_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.28.up_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.29.down_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.29.gate_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.29.up_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.3.down_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.8.mlp.experts.3.gate_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.8.mlp.experts.3.up_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.8.mlp.experts.30.down_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.30.gate_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.30.up_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.31.down_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.31.gate_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.31.up_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.32.down_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.32.gate_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.32.up_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.33.down_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.33.gate_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.33.up_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.34.down_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.34.gate_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.34.up_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.35.down_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.35.gate_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.35.up_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.36.down_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.36.gate_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.36.up_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.37.down_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.37.gate_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.37.up_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.38.down_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.38.gate_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.38.up_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.39.down_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.39.gate_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.39.up_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.4.down_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.8.mlp.experts.4.gate_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.8.mlp.experts.4.up_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.8.mlp.experts.40.down_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.40.gate_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.40.up_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.41.down_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.41.gate_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.41.up_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.42.down_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.42.gate_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.42.up_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.43.down_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.43.gate_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.43.up_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.44.down_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.44.gate_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.44.up_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.45.down_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.45.gate_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.45.up_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.46.down_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.46.gate_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.46.up_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.47.down_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.47.gate_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.47.up_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.48.down_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.48.gate_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.48.up_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.49.down_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.49.gate_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.49.up_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.5.down_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.8.mlp.experts.5.gate_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.8.mlp.experts.5.up_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.8.mlp.experts.50.down_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.50.gate_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.50.up_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.51.down_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.51.gate_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.51.up_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.52.down_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.52.gate_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.52.up_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.53.down_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.53.gate_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.53.up_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.54.down_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.54.gate_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.54.up_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.55.down_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.55.gate_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.55.up_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.56.down_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.56.gate_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.56.up_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.57.down_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.57.gate_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.57.up_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.58.down_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.58.gate_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.58.up_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.59.down_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.59.gate_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.59.up_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.6.down_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.8.mlp.experts.6.gate_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.8.mlp.experts.6.up_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.8.mlp.experts.60.down_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.60.gate_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.60.up_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.61.down_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.61.gate_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.61.up_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.62.down_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.62.gate_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.62.up_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.63.down_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.63.gate_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.63.up_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.64.down_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.64.gate_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.64.up_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.65.down_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.65.gate_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.65.up_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.66.down_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.66.gate_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.66.up_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.67.down_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.67.gate_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.67.up_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.68.down_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.68.gate_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.68.up_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.69.down_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.69.gate_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.69.up_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.7.down_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.8.mlp.experts.7.gate_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.8.mlp.experts.7.up_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.8.mlp.experts.70.down_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.70.gate_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.70.up_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.71.down_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.71.gate_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.71.up_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.72.down_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.72.gate_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.72.up_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.73.down_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.73.gate_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.73.up_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.74.down_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.74.gate_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.74.up_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.75.down_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.75.gate_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.75.up_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.76.down_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.76.gate_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.76.up_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.77.down_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.77.gate_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.77.up_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.78.down_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.78.gate_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.78.up_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.79.down_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.79.gate_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.79.up_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.experts.8.down_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.8.mlp.experts.8.gate_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.8.mlp.experts.8.up_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.8.mlp.experts.9.down_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.8.mlp.experts.9.gate_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.8.mlp.experts.9.up_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.8.mlp.gate.e_score_correction_bias": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.gate.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.shared_experts.down_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.shared_experts.gate_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.mlp.shared_experts.up_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00006-of-00075.safetensors", + "model.layers.8.self_attn.k_norm.weight": "model-00005-of-00075.safetensors", + "model.layers.8.self_attn.k_proj.bias": "model-00005-of-00075.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.8.self_attn.q_norm.weight": "model-00005-of-00075.safetensors", + "model.layers.8.self_attn.q_proj.bias": "model-00005-of-00075.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.8.self_attn.v_proj.bias": "model-00005-of-00075.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00005-of-00075.safetensors", + "model.layers.80.input_layernorm.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.0.down_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.80.mlp.experts.0.gate_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.80.mlp.experts.0.up_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.80.mlp.experts.1.down_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.80.mlp.experts.1.gate_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.80.mlp.experts.1.up_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.80.mlp.experts.10.down_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.10.gate_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.10.up_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.11.down_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.11.gate_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.11.up_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.12.down_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.12.gate_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.12.up_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.13.down_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.13.gate_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.13.up_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.14.down_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.14.gate_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.14.up_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.15.down_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.15.gate_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.15.up_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.16.down_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.16.gate_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.16.up_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.17.down_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.17.gate_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.17.up_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.18.down_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.18.gate_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.18.up_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.19.down_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.19.gate_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.19.up_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.2.down_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.80.mlp.experts.2.gate_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.80.mlp.experts.2.up_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.80.mlp.experts.20.down_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.20.gate_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.20.up_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.21.down_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.21.gate_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.21.up_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.22.down_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.22.gate_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.22.up_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.23.down_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.23.gate_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.23.up_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.24.down_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.24.gate_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.24.up_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.25.down_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.25.gate_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.25.up_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.26.down_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.26.gate_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.26.up_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.27.down_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.27.gate_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.27.up_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.28.down_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.28.gate_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.28.up_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.29.down_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.29.gate_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.29.up_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.3.down_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.80.mlp.experts.3.gate_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.80.mlp.experts.3.up_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.80.mlp.experts.30.down_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.30.gate_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.30.up_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.31.down_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.31.gate_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.31.up_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.32.down_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.32.gate_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.32.up_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.33.down_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.33.gate_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.33.up_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.34.down_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.34.gate_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.34.up_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.35.down_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.35.gate_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.35.up_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.36.down_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.36.gate_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.36.up_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.37.down_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.37.gate_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.37.up_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.38.down_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.38.gate_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.38.up_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.39.down_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.39.gate_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.39.up_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.4.down_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.80.mlp.experts.4.gate_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.80.mlp.experts.4.up_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.80.mlp.experts.40.down_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.40.gate_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.40.up_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.41.down_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.41.gate_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.41.up_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.42.down_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.42.gate_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.42.up_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.43.down_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.43.gate_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.43.up_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.44.down_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.44.gate_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.44.up_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.45.down_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.45.gate_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.45.up_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.46.down_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.46.gate_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.46.up_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.47.down_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.47.gate_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.47.up_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.48.down_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.48.gate_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.48.up_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.49.down_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.49.gate_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.49.up_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.5.down_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.5.gate_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.80.mlp.experts.5.up_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.80.mlp.experts.50.down_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.50.gate_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.50.up_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.51.down_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.51.gate_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.51.up_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.52.down_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.52.gate_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.52.up_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.53.down_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.53.gate_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.53.up_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.54.down_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.54.gate_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.54.up_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.55.down_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.55.gate_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.55.up_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.56.down_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.56.gate_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.56.up_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.57.down_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.57.gate_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.57.up_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.58.down_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.58.gate_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.58.up_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.59.down_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.59.gate_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.59.up_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.6.down_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.6.gate_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.6.up_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.60.down_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.60.gate_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.60.up_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.61.down_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.61.gate_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.61.up_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.62.down_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.62.gate_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.62.up_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.63.down_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.63.gate_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.63.up_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.64.down_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.64.gate_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.64.up_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.65.down_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.65.gate_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.65.up_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.66.down_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.66.gate_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.66.up_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.67.down_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.67.gate_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.67.up_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.68.down_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.68.gate_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.68.up_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.69.down_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.69.gate_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.69.up_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.7.down_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.7.gate_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.7.up_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.70.down_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.70.gate_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.70.up_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.71.down_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.71.gate_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.71.up_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.72.down_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.72.gate_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.72.up_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.73.down_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.73.gate_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.73.up_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.74.down_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.74.gate_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.74.up_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.75.down_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.75.gate_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.75.up_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.76.down_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.76.gate_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.76.up_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.77.down_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.77.gate_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.77.up_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.78.down_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.78.gate_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.78.up_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.79.down_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.79.gate_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.79.up_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.8.down_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.8.gate_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.8.up_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.9.down_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.9.gate_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.experts.9.up_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.gate.e_score_correction_bias": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.gate.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.shared_experts.down_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.shared_experts.gate_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.mlp.shared_experts.up_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.80.post_attention_layernorm.weight": "model-00065-of-00075.safetensors", + "model.layers.80.self_attn.k_norm.weight": "model-00064-of-00075.safetensors", + "model.layers.80.self_attn.k_proj.bias": "model-00064-of-00075.safetensors", + "model.layers.80.self_attn.k_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.80.self_attn.o_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.80.self_attn.q_norm.weight": "model-00064-of-00075.safetensors", + "model.layers.80.self_attn.q_proj.bias": "model-00064-of-00075.safetensors", + "model.layers.80.self_attn.q_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.80.self_attn.v_proj.bias": "model-00064-of-00075.safetensors", + "model.layers.80.self_attn.v_proj.weight": "model-00064-of-00075.safetensors", + "model.layers.81.input_layernorm.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.0.down_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.81.mlp.experts.0.gate_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.81.mlp.experts.0.up_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.81.mlp.experts.1.down_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.81.mlp.experts.1.gate_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.81.mlp.experts.1.up_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.81.mlp.experts.10.down_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.81.mlp.experts.10.gate_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.81.mlp.experts.10.up_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.81.mlp.experts.11.down_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.81.mlp.experts.11.gate_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.81.mlp.experts.11.up_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.81.mlp.experts.12.down_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.81.mlp.experts.12.gate_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.81.mlp.experts.12.up_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.81.mlp.experts.13.down_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.81.mlp.experts.13.gate_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.81.mlp.experts.13.up_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.81.mlp.experts.14.down_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.81.mlp.experts.14.gate_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.81.mlp.experts.14.up_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.81.mlp.experts.15.down_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.81.mlp.experts.15.gate_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.81.mlp.experts.15.up_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.81.mlp.experts.16.down_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.81.mlp.experts.16.gate_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.81.mlp.experts.16.up_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.81.mlp.experts.17.down_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.81.mlp.experts.17.gate_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.81.mlp.experts.17.up_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.81.mlp.experts.18.down_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.81.mlp.experts.18.gate_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.81.mlp.experts.18.up_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.81.mlp.experts.19.down_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.81.mlp.experts.19.gate_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.81.mlp.experts.19.up_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.81.mlp.experts.2.down_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.81.mlp.experts.2.gate_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.81.mlp.experts.2.up_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.81.mlp.experts.20.down_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.81.mlp.experts.20.gate_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.81.mlp.experts.20.up_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.81.mlp.experts.21.down_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.81.mlp.experts.21.gate_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.81.mlp.experts.21.up_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.81.mlp.experts.22.down_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.81.mlp.experts.22.gate_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.81.mlp.experts.22.up_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.81.mlp.experts.23.down_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.81.mlp.experts.23.gate_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.81.mlp.experts.23.up_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.81.mlp.experts.24.down_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.24.gate_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.81.mlp.experts.24.up_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.81.mlp.experts.25.down_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.25.gate_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.25.up_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.26.down_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.26.gate_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.26.up_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.27.down_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.27.gate_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.27.up_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.28.down_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.28.gate_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.28.up_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.29.down_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.29.gate_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.29.up_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.3.down_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.81.mlp.experts.3.gate_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.81.mlp.experts.3.up_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.81.mlp.experts.30.down_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.30.gate_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.30.up_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.31.down_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.31.gate_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.31.up_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.32.down_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.32.gate_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.32.up_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.33.down_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.33.gate_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.33.up_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.34.down_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.34.gate_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.34.up_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.35.down_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.35.gate_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.35.up_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.36.down_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.36.gate_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.36.up_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.37.down_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.37.gate_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.37.up_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.38.down_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.38.gate_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.38.up_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.39.down_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.39.gate_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.39.up_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.4.down_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.81.mlp.experts.4.gate_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.81.mlp.experts.4.up_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.81.mlp.experts.40.down_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.40.gate_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.40.up_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.41.down_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.41.gate_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.41.up_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.42.down_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.42.gate_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.42.up_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.43.down_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.43.gate_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.43.up_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.44.down_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.44.gate_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.44.up_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.45.down_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.45.gate_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.45.up_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.46.down_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.46.gate_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.46.up_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.47.down_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.47.gate_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.47.up_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.48.down_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.48.gate_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.48.up_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.49.down_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.49.gate_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.49.up_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.5.down_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.81.mlp.experts.5.gate_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.81.mlp.experts.5.up_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.81.mlp.experts.50.down_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.50.gate_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.50.up_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.51.down_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.51.gate_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.51.up_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.52.down_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.52.gate_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.52.up_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.53.down_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.53.gate_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.53.up_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.54.down_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.54.gate_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.54.up_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.55.down_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.55.gate_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.55.up_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.56.down_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.56.gate_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.56.up_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.57.down_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.57.gate_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.57.up_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.58.down_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.58.gate_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.58.up_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.59.down_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.59.gate_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.59.up_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.6.down_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.81.mlp.experts.6.gate_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.81.mlp.experts.6.up_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.81.mlp.experts.60.down_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.60.gate_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.60.up_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.61.down_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.61.gate_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.61.up_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.62.down_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.62.gate_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.62.up_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.63.down_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.63.gate_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.63.up_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.64.down_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.64.gate_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.64.up_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.65.down_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.65.gate_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.65.up_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.66.down_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.66.gate_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.66.up_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.67.down_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.67.gate_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.67.up_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.68.down_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.68.gate_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.68.up_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.69.down_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.69.gate_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.69.up_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.7.down_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.81.mlp.experts.7.gate_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.81.mlp.experts.7.up_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.81.mlp.experts.70.down_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.70.gate_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.70.up_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.71.down_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.71.gate_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.71.up_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.72.down_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.72.gate_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.72.up_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.73.down_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.73.gate_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.73.up_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.74.down_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.74.gate_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.74.up_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.75.down_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.75.gate_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.75.up_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.76.down_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.76.gate_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.76.up_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.77.down_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.77.gate_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.77.up_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.78.down_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.78.gate_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.78.up_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.79.down_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.79.gate_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.79.up_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.experts.8.down_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.81.mlp.experts.8.gate_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.81.mlp.experts.8.up_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.81.mlp.experts.9.down_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.81.mlp.experts.9.gate_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.81.mlp.experts.9.up_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.81.mlp.gate.e_score_correction_bias": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.gate.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.shared_experts.down_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.shared_experts.gate_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.mlp.shared_experts.up_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.81.post_attention_layernorm.weight": "model-00066-of-00075.safetensors", + "model.layers.81.self_attn.k_norm.weight": "model-00065-of-00075.safetensors", + "model.layers.81.self_attn.k_proj.bias": "model-00065-of-00075.safetensors", + "model.layers.81.self_attn.k_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.81.self_attn.o_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.81.self_attn.q_norm.weight": "model-00065-of-00075.safetensors", + "model.layers.81.self_attn.q_proj.bias": "model-00065-of-00075.safetensors", + "model.layers.81.self_attn.q_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.81.self_attn.v_proj.bias": "model-00065-of-00075.safetensors", + "model.layers.81.self_attn.v_proj.weight": "model-00065-of-00075.safetensors", + "model.layers.82.input_layernorm.weight": "model-00067-of-00075.safetensors", + "model.layers.82.mlp.experts.0.down_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.0.gate_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.0.up_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.1.down_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.1.gate_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.1.up_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.10.down_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.10.gate_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.10.up_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.11.down_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.11.gate_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.11.up_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.12.down_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.12.gate_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.12.up_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.13.down_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.13.gate_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.13.up_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.14.down_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.14.gate_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.14.up_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.15.down_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.15.gate_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.15.up_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.16.down_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.16.gate_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.16.up_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.17.down_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.17.gate_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.17.up_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.18.down_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.18.gate_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.18.up_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.19.down_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.19.gate_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.19.up_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.2.down_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.2.gate_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.2.up_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.20.down_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.20.gate_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.20.up_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.21.down_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.21.gate_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.21.up_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.22.down_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.22.gate_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.22.up_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.23.down_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.23.gate_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.23.up_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.24.down_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.24.gate_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.24.up_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.25.down_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.25.gate_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.25.up_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.26.down_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.26.gate_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.26.up_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.27.down_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.27.gate_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.27.up_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.28.down_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.28.gate_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.28.up_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.29.down_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.29.gate_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.29.up_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.3.down_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.3.gate_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.3.up_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.30.down_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.30.gate_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.30.up_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.31.down_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.31.gate_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.31.up_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.32.down_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.32.gate_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.32.up_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.33.down_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.33.gate_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.33.up_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.34.down_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.34.gate_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.34.up_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.35.down_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.35.gate_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.35.up_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.36.down_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.36.gate_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.36.up_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.37.down_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.37.gate_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.37.up_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.38.down_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.38.gate_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.38.up_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.39.down_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.39.gate_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.39.up_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.4.down_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.4.gate_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.4.up_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.40.down_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.40.gate_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.40.up_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.41.down_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.41.gate_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.41.up_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.42.down_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.42.gate_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.42.up_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.43.down_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.82.mlp.experts.43.gate_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.43.up_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.44.down_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.82.mlp.experts.44.gate_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.82.mlp.experts.44.up_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.82.mlp.experts.45.down_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.82.mlp.experts.45.gate_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.82.mlp.experts.45.up_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.82.mlp.experts.46.down_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.82.mlp.experts.46.gate_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.82.mlp.experts.46.up_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.82.mlp.experts.47.down_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.82.mlp.experts.47.gate_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.82.mlp.experts.47.up_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.82.mlp.experts.48.down_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.82.mlp.experts.48.gate_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.82.mlp.experts.48.up_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.82.mlp.experts.49.down_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.82.mlp.experts.49.gate_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.82.mlp.experts.49.up_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.82.mlp.experts.5.down_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.5.gate_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.5.up_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.50.down_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.82.mlp.experts.50.gate_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.82.mlp.experts.50.up_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.82.mlp.experts.51.down_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.82.mlp.experts.51.gate_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.82.mlp.experts.51.up_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.82.mlp.experts.52.down_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.82.mlp.experts.52.gate_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.82.mlp.experts.52.up_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.82.mlp.experts.53.down_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.82.mlp.experts.53.gate_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.82.mlp.experts.53.up_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.82.mlp.experts.54.down_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.82.mlp.experts.54.gate_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.82.mlp.experts.54.up_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.82.mlp.experts.55.down_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.82.mlp.experts.55.gate_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.82.mlp.experts.55.up_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.82.mlp.experts.56.down_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.82.mlp.experts.56.gate_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.82.mlp.experts.56.up_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.82.mlp.experts.57.down_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.82.mlp.experts.57.gate_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.82.mlp.experts.57.up_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.82.mlp.experts.58.down_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.82.mlp.experts.58.gate_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.82.mlp.experts.58.up_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.82.mlp.experts.59.down_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.82.mlp.experts.59.gate_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.82.mlp.experts.59.up_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.82.mlp.experts.6.down_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.6.gate_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.6.up_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.60.down_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.82.mlp.experts.60.gate_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.82.mlp.experts.60.up_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.82.mlp.experts.61.down_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.82.mlp.experts.61.gate_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.82.mlp.experts.61.up_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.82.mlp.experts.62.down_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.82.mlp.experts.62.gate_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.82.mlp.experts.62.up_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.82.mlp.experts.63.down_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.82.mlp.experts.63.gate_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.82.mlp.experts.63.up_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.82.mlp.experts.64.down_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.82.mlp.experts.64.gate_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.82.mlp.experts.64.up_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.82.mlp.experts.65.down_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.82.mlp.experts.65.gate_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.82.mlp.experts.65.up_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.82.mlp.experts.66.down_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.82.mlp.experts.66.gate_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.82.mlp.experts.66.up_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.82.mlp.experts.67.down_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.82.mlp.experts.67.gate_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.82.mlp.experts.67.up_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.82.mlp.experts.68.down_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.82.mlp.experts.68.gate_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.82.mlp.experts.68.up_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.82.mlp.experts.69.down_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.82.mlp.experts.69.gate_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.82.mlp.experts.69.up_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.82.mlp.experts.7.down_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.7.gate_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.7.up_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.70.down_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.82.mlp.experts.70.gate_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.82.mlp.experts.70.up_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.82.mlp.experts.71.down_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.82.mlp.experts.71.gate_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.82.mlp.experts.71.up_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.82.mlp.experts.72.down_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.82.mlp.experts.72.gate_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.82.mlp.experts.72.up_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.82.mlp.experts.73.down_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.82.mlp.experts.73.gate_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.82.mlp.experts.73.up_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.82.mlp.experts.74.down_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.82.mlp.experts.74.gate_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.82.mlp.experts.74.up_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.82.mlp.experts.75.down_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.82.mlp.experts.75.gate_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.82.mlp.experts.75.up_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.82.mlp.experts.76.down_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.82.mlp.experts.76.gate_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.82.mlp.experts.76.up_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.82.mlp.experts.77.down_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.82.mlp.experts.77.gate_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.82.mlp.experts.77.up_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.82.mlp.experts.78.down_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.82.mlp.experts.78.gate_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.82.mlp.experts.78.up_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.82.mlp.experts.79.down_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.82.mlp.experts.79.gate_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.82.mlp.experts.79.up_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.82.mlp.experts.8.down_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.8.gate_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.8.up_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.9.down_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.9.gate_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.experts.9.up_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.mlp.gate.e_score_correction_bias": "model-00067-of-00075.safetensors", + "model.layers.82.mlp.gate.weight": "model-00067-of-00075.safetensors", + "model.layers.82.mlp.shared_experts.down_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.82.mlp.shared_experts.gate_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.82.mlp.shared_experts.up_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.82.post_attention_layernorm.weight": "model-00067-of-00075.safetensors", + "model.layers.82.self_attn.k_norm.weight": "model-00066-of-00075.safetensors", + "model.layers.82.self_attn.k_proj.bias": "model-00066-of-00075.safetensors", + "model.layers.82.self_attn.k_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.self_attn.o_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.self_attn.q_norm.weight": "model-00066-of-00075.safetensors", + "model.layers.82.self_attn.q_proj.bias": "model-00066-of-00075.safetensors", + "model.layers.82.self_attn.q_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.82.self_attn.v_proj.bias": "model-00066-of-00075.safetensors", + "model.layers.82.self_attn.v_proj.weight": "model-00066-of-00075.safetensors", + "model.layers.83.input_layernorm.weight": "model-00068-of-00075.safetensors", + "model.layers.83.mlp.experts.0.down_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.0.gate_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.0.up_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.1.down_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.1.gate_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.1.up_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.10.down_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.10.gate_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.10.up_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.11.down_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.11.gate_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.11.up_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.12.down_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.12.gate_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.12.up_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.13.down_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.13.gate_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.13.up_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.14.down_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.14.gate_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.14.up_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.15.down_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.15.gate_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.15.up_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.16.down_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.16.gate_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.16.up_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.17.down_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.17.gate_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.17.up_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.18.down_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.18.gate_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.18.up_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.19.down_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.19.gate_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.19.up_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.2.down_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.2.gate_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.2.up_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.20.down_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.20.gate_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.20.up_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.21.down_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.21.gate_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.21.up_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.22.down_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.22.gate_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.22.up_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.23.down_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.23.gate_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.23.up_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.24.down_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.24.gate_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.24.up_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.25.down_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.25.gate_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.25.up_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.26.down_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.26.gate_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.26.up_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.27.down_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.27.gate_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.27.up_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.28.down_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.28.gate_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.28.up_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.29.down_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.29.gate_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.29.up_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.3.down_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.3.gate_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.3.up_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.30.down_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.30.gate_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.30.up_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.31.down_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.31.gate_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.31.up_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.32.down_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.32.gate_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.32.up_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.33.down_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.33.gate_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.33.up_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.34.down_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.34.gate_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.34.up_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.35.down_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.35.gate_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.35.up_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.36.down_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.36.gate_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.36.up_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.37.down_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.37.gate_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.37.up_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.38.down_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.38.gate_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.38.up_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.39.down_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.39.gate_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.39.up_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.4.down_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.4.gate_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.4.up_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.40.down_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.40.gate_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.40.up_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.41.down_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.41.gate_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.41.up_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.42.down_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.42.gate_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.42.up_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.43.down_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.43.gate_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.43.up_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.44.down_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.44.gate_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.44.up_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.45.down_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.45.gate_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.45.up_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.46.down_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.46.gate_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.46.up_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.47.down_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.47.gate_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.47.up_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.48.down_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.48.gate_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.48.up_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.49.down_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.49.gate_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.49.up_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.5.down_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.5.gate_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.5.up_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.50.down_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.50.gate_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.50.up_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.51.down_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.51.gate_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.51.up_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.52.down_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.52.gate_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.52.up_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.53.down_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.53.gate_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.53.up_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.54.down_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.54.gate_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.54.up_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.55.down_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.55.gate_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.55.up_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.56.down_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.56.gate_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.56.up_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.57.down_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.57.gate_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.57.up_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.58.down_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.58.gate_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.58.up_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.59.down_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.59.gate_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.59.up_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.6.down_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.6.gate_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.6.up_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.60.down_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.60.gate_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.60.up_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.61.down_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.61.gate_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.61.up_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.62.down_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.83.mlp.experts.62.gate_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.62.up_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.63.down_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.83.mlp.experts.63.gate_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.83.mlp.experts.63.up_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.83.mlp.experts.64.down_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.83.mlp.experts.64.gate_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.83.mlp.experts.64.up_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.83.mlp.experts.65.down_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.83.mlp.experts.65.gate_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.83.mlp.experts.65.up_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.83.mlp.experts.66.down_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.83.mlp.experts.66.gate_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.83.mlp.experts.66.up_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.83.mlp.experts.67.down_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.83.mlp.experts.67.gate_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.83.mlp.experts.67.up_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.83.mlp.experts.68.down_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.83.mlp.experts.68.gate_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.83.mlp.experts.68.up_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.83.mlp.experts.69.down_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.83.mlp.experts.69.gate_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.83.mlp.experts.69.up_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.83.mlp.experts.7.down_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.7.gate_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.7.up_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.70.down_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.83.mlp.experts.70.gate_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.83.mlp.experts.70.up_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.83.mlp.experts.71.down_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.83.mlp.experts.71.gate_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.83.mlp.experts.71.up_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.83.mlp.experts.72.down_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.83.mlp.experts.72.gate_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.83.mlp.experts.72.up_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.83.mlp.experts.73.down_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.83.mlp.experts.73.gate_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.83.mlp.experts.73.up_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.83.mlp.experts.74.down_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.83.mlp.experts.74.gate_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.83.mlp.experts.74.up_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.83.mlp.experts.75.down_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.83.mlp.experts.75.gate_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.83.mlp.experts.75.up_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.83.mlp.experts.76.down_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.83.mlp.experts.76.gate_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.83.mlp.experts.76.up_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.83.mlp.experts.77.down_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.83.mlp.experts.77.gate_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.83.mlp.experts.77.up_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.83.mlp.experts.78.down_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.83.mlp.experts.78.gate_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.83.mlp.experts.78.up_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.83.mlp.experts.79.down_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.83.mlp.experts.79.gate_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.83.mlp.experts.79.up_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.83.mlp.experts.8.down_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.8.gate_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.8.up_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.9.down_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.9.gate_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.experts.9.up_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.mlp.gate.e_score_correction_bias": "model-00068-of-00075.safetensors", + "model.layers.83.mlp.gate.weight": "model-00068-of-00075.safetensors", + "model.layers.83.mlp.shared_experts.down_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.83.mlp.shared_experts.gate_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.83.mlp.shared_experts.up_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.83.post_attention_layernorm.weight": "model-00068-of-00075.safetensors", + "model.layers.83.self_attn.k_norm.weight": "model-00067-of-00075.safetensors", + "model.layers.83.self_attn.k_proj.bias": "model-00067-of-00075.safetensors", + "model.layers.83.self_attn.k_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.self_attn.o_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.self_attn.q_norm.weight": "model-00067-of-00075.safetensors", + "model.layers.83.self_attn.q_proj.bias": "model-00067-of-00075.safetensors", + "model.layers.83.self_attn.q_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.83.self_attn.v_proj.bias": "model-00067-of-00075.safetensors", + "model.layers.83.self_attn.v_proj.weight": "model-00067-of-00075.safetensors", + "model.layers.84.input_layernorm.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.0.down_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.0.gate_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.0.up_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.1.down_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.1.gate_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.1.up_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.10.down_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.10.gate_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.10.up_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.11.down_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.11.gate_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.11.up_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.12.down_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.12.gate_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.12.up_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.13.down_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.13.gate_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.13.up_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.14.down_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.14.gate_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.14.up_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.15.down_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.15.gate_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.15.up_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.16.down_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.16.gate_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.16.up_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.17.down_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.17.gate_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.17.up_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.18.down_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.18.gate_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.18.up_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.19.down_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.19.gate_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.19.up_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.2.down_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.2.gate_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.2.up_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.20.down_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.20.gate_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.20.up_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.21.down_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.21.gate_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.21.up_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.22.down_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.22.gate_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.22.up_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.23.down_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.23.gate_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.23.up_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.24.down_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.24.gate_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.24.up_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.25.down_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.25.gate_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.25.up_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.26.down_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.26.gate_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.26.up_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.27.down_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.27.gate_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.27.up_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.28.down_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.28.gate_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.28.up_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.29.down_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.29.gate_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.29.up_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.3.down_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.3.gate_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.3.up_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.30.down_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.30.gate_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.30.up_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.31.down_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.31.gate_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.31.up_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.32.down_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.32.gate_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.32.up_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.33.down_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.33.gate_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.33.up_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.34.down_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.34.gate_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.34.up_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.35.down_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.35.gate_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.35.up_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.36.down_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.36.gate_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.36.up_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.37.down_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.37.gate_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.37.up_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.38.down_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.38.gate_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.38.up_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.39.down_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.39.gate_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.39.up_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.4.down_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.4.gate_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.4.up_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.40.down_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.40.gate_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.40.up_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.41.down_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.41.gate_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.41.up_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.42.down_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.42.gate_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.42.up_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.43.down_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.43.gate_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.43.up_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.44.down_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.44.gate_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.44.up_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.45.down_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.45.gate_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.45.up_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.46.down_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.46.gate_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.46.up_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.47.down_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.47.gate_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.47.up_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.48.down_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.48.gate_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.48.up_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.49.down_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.49.gate_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.49.up_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.5.down_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.5.gate_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.5.up_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.50.down_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.50.gate_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.50.up_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.51.down_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.51.gate_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.51.up_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.52.down_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.52.gate_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.52.up_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.53.down_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.53.gate_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.53.up_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.54.down_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.54.gate_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.54.up_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.55.down_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.55.gate_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.55.up_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.56.down_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.56.gate_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.56.up_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.57.down_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.57.gate_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.57.up_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.58.down_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.58.gate_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.58.up_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.59.down_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.59.gate_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.59.up_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.6.down_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.6.gate_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.6.up_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.60.down_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.60.gate_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.60.up_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.61.down_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.61.gate_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.61.up_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.62.down_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.62.gate_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.62.up_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.63.down_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.63.gate_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.63.up_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.64.down_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.64.gate_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.64.up_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.65.down_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.65.gate_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.65.up_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.66.down_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.66.gate_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.66.up_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.67.down_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.67.gate_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.67.up_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.68.down_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.68.gate_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.68.up_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.69.down_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.69.gate_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.69.up_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.7.down_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.7.gate_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.7.up_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.70.down_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.70.gate_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.70.up_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.71.down_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.71.gate_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.71.up_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.72.down_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.72.gate_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.72.up_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.73.down_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.73.gate_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.73.up_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.74.down_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.74.gate_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.74.up_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.75.down_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.75.gate_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.75.up_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.76.down_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.76.gate_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.76.up_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.77.down_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.77.gate_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.77.up_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.78.down_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.78.gate_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.78.up_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.79.down_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.79.gate_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.79.up_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.8.down_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.8.gate_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.8.up_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.9.down_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.9.gate_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.experts.9.up_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.gate.e_score_correction_bias": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.gate.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.shared_experts.down_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.shared_experts.gate_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.mlp.shared_experts.up_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.post_attention_layernorm.weight": "model-00068-of-00075.safetensors", + "model.layers.84.self_attn.k_norm.weight": "model-00068-of-00075.safetensors", + "model.layers.84.self_attn.k_proj.bias": "model-00068-of-00075.safetensors", + "model.layers.84.self_attn.k_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.self_attn.o_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.self_attn.q_norm.weight": "model-00068-of-00075.safetensors", + "model.layers.84.self_attn.q_proj.bias": "model-00068-of-00075.safetensors", + "model.layers.84.self_attn.q_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.84.self_attn.v_proj.bias": "model-00068-of-00075.safetensors", + "model.layers.84.self_attn.v_proj.weight": "model-00068-of-00075.safetensors", + "model.layers.85.input_layernorm.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.0.down_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.0.gate_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.0.up_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.1.down_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.1.gate_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.1.up_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.10.down_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.10.gate_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.10.up_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.11.down_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.11.gate_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.11.up_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.12.down_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.12.gate_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.12.up_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.13.down_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.13.gate_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.13.up_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.14.down_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.14.gate_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.14.up_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.15.down_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.15.gate_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.15.up_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.16.down_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.16.gate_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.16.up_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.17.down_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.17.gate_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.17.up_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.18.down_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.18.gate_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.18.up_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.19.down_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.19.gate_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.19.up_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.2.down_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.2.gate_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.2.up_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.20.down_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.20.gate_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.20.up_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.21.down_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.21.gate_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.21.up_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.22.down_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.22.gate_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.22.up_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.23.down_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.23.gate_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.23.up_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.24.down_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.24.gate_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.24.up_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.25.down_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.25.gate_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.25.up_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.26.down_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.26.gate_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.26.up_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.27.down_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.27.gate_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.27.up_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.28.down_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.28.gate_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.28.up_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.29.down_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.29.gate_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.29.up_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.3.down_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.3.gate_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.3.up_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.30.down_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.30.gate_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.30.up_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.31.down_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.31.gate_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.31.up_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.32.down_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.32.gate_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.32.up_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.33.down_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.33.gate_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.33.up_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.34.down_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.34.gate_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.34.up_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.35.down_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.35.gate_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.35.up_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.36.down_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.36.gate_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.36.up_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.37.down_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.37.gate_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.37.up_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.38.down_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.38.gate_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.38.up_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.39.down_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.39.gate_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.39.up_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.4.down_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.4.gate_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.4.up_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.40.down_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.40.gate_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.40.up_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.41.down_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.41.gate_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.41.up_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.42.down_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.42.gate_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.42.up_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.43.down_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.43.gate_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.43.up_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.44.down_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.44.gate_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.44.up_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.45.down_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.45.gate_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.45.up_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.46.down_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.46.gate_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.46.up_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.47.down_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.47.gate_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.47.up_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.48.down_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.48.gate_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.48.up_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.49.down_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.49.gate_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.49.up_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.5.down_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.5.gate_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.5.up_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.50.down_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.50.gate_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.50.up_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.51.down_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.51.gate_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.51.up_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.52.down_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.52.gate_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.52.up_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.53.down_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.53.gate_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.53.up_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.54.down_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.54.gate_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.54.up_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.55.down_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.55.gate_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.55.up_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.56.down_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.56.gate_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.56.up_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.57.down_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.57.gate_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.57.up_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.58.down_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.58.gate_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.58.up_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.59.down_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.59.gate_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.59.up_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.6.down_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.6.gate_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.6.up_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.60.down_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.60.gate_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.60.up_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.61.down_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.61.gate_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.61.up_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.62.down_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.62.gate_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.62.up_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.63.down_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.63.gate_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.63.up_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.64.down_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.64.gate_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.64.up_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.65.down_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.65.gate_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.65.up_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.66.down_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.66.gate_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.66.up_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.67.down_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.67.gate_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.67.up_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.68.down_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.68.gate_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.68.up_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.69.down_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.69.gate_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.69.up_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.7.down_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.7.gate_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.7.up_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.70.down_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.70.gate_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.70.up_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.71.down_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.71.gate_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.71.up_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.72.down_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.72.gate_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.72.up_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.73.down_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.73.gate_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.73.up_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.74.down_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.74.gate_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.74.up_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.75.down_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.75.gate_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.75.up_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.76.down_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.76.gate_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.76.up_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.77.down_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.77.gate_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.77.up_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.78.down_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.78.gate_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.78.up_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.79.down_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.79.gate_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.79.up_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.8.down_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.8.gate_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.8.up_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.9.down_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.9.gate_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.experts.9.up_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.gate.e_score_correction_bias": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.gate.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.shared_experts.down_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.shared_experts.gate_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.mlp.shared_experts.up_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.post_attention_layernorm.weight": "model-00069-of-00075.safetensors", + "model.layers.85.self_attn.k_norm.weight": "model-00069-of-00075.safetensors", + "model.layers.85.self_attn.k_proj.bias": "model-00069-of-00075.safetensors", + "model.layers.85.self_attn.k_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.self_attn.o_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.self_attn.q_norm.weight": "model-00069-of-00075.safetensors", + "model.layers.85.self_attn.q_proj.bias": "model-00069-of-00075.safetensors", + "model.layers.85.self_attn.q_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.85.self_attn.v_proj.bias": "model-00069-of-00075.safetensors", + "model.layers.85.self_attn.v_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.86.input_layernorm.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.0.down_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.86.mlp.experts.0.gate_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.86.mlp.experts.0.up_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.86.mlp.experts.1.down_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.86.mlp.experts.1.gate_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.86.mlp.experts.1.up_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.86.mlp.experts.10.down_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.86.mlp.experts.10.gate_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.86.mlp.experts.10.up_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.86.mlp.experts.11.down_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.86.mlp.experts.11.gate_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.86.mlp.experts.11.up_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.86.mlp.experts.12.down_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.86.mlp.experts.12.gate_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.86.mlp.experts.12.up_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.86.mlp.experts.13.down_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.13.gate_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.86.mlp.experts.13.up_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.14.down_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.14.gate_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.14.up_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.15.down_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.15.gate_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.15.up_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.16.down_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.16.gate_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.16.up_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.17.down_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.17.gate_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.17.up_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.18.down_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.18.gate_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.18.up_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.19.down_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.19.gate_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.19.up_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.2.down_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.86.mlp.experts.2.gate_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.86.mlp.experts.2.up_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.86.mlp.experts.20.down_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.20.gate_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.20.up_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.21.down_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.21.gate_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.21.up_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.22.down_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.22.gate_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.22.up_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.23.down_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.23.gate_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.23.up_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.24.down_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.24.gate_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.24.up_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.25.down_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.25.gate_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.25.up_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.26.down_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.26.gate_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.26.up_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.27.down_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.27.gate_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.27.up_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.28.down_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.28.gate_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.28.up_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.29.down_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.29.gate_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.29.up_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.3.down_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.86.mlp.experts.3.gate_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.86.mlp.experts.3.up_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.86.mlp.experts.30.down_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.30.gate_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.30.up_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.31.down_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.31.gate_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.31.up_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.32.down_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.32.gate_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.32.up_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.33.down_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.33.gate_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.33.up_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.34.down_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.34.gate_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.34.up_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.35.down_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.35.gate_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.35.up_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.36.down_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.36.gate_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.36.up_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.37.down_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.37.gate_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.37.up_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.38.down_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.38.gate_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.38.up_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.39.down_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.39.gate_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.39.up_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.4.down_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.86.mlp.experts.4.gate_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.86.mlp.experts.4.up_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.86.mlp.experts.40.down_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.40.gate_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.40.up_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.41.down_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.41.gate_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.41.up_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.42.down_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.42.gate_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.42.up_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.43.down_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.43.gate_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.43.up_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.44.down_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.44.gate_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.44.up_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.45.down_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.45.gate_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.45.up_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.46.down_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.46.gate_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.46.up_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.47.down_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.47.gate_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.47.up_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.48.down_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.48.gate_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.48.up_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.49.down_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.49.gate_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.49.up_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.5.down_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.86.mlp.experts.5.gate_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.86.mlp.experts.5.up_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.86.mlp.experts.50.down_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.50.gate_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.50.up_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.51.down_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.51.gate_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.51.up_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.52.down_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.52.gate_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.52.up_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.53.down_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.53.gate_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.53.up_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.54.down_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.54.gate_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.54.up_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.55.down_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.55.gate_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.55.up_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.56.down_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.56.gate_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.56.up_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.57.down_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.57.gate_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.57.up_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.58.down_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.58.gate_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.58.up_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.59.down_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.59.gate_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.59.up_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.6.down_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.86.mlp.experts.6.gate_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.86.mlp.experts.6.up_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.86.mlp.experts.60.down_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.60.gate_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.60.up_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.61.down_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.61.gate_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.61.up_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.62.down_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.62.gate_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.62.up_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.63.down_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.63.gate_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.63.up_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.64.down_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.64.gate_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.64.up_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.65.down_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.65.gate_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.65.up_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.66.down_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.66.gate_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.66.up_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.67.down_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.67.gate_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.67.up_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.68.down_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.68.gate_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.68.up_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.69.down_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.69.gate_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.69.up_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.7.down_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.86.mlp.experts.7.gate_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.86.mlp.experts.7.up_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.86.mlp.experts.70.down_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.70.gate_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.70.up_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.71.down_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.71.gate_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.71.up_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.72.down_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.72.gate_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.72.up_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.73.down_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.73.gate_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.73.up_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.74.down_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.74.gate_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.74.up_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.75.down_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.75.gate_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.75.up_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.76.down_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.76.gate_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.76.up_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.77.down_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.77.gate_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.77.up_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.78.down_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.78.gate_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.78.up_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.79.down_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.79.gate_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.79.up_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.experts.8.down_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.86.mlp.experts.8.gate_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.86.mlp.experts.8.up_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.86.mlp.experts.9.down_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.86.mlp.experts.9.gate_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.86.mlp.experts.9.up_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.86.mlp.gate.e_score_correction_bias": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.gate.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.shared_experts.down_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.shared_experts.gate_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.mlp.shared_experts.up_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.86.post_attention_layernorm.weight": "model-00070-of-00075.safetensors", + "model.layers.86.self_attn.k_norm.weight": "model-00069-of-00075.safetensors", + "model.layers.86.self_attn.k_proj.bias": "model-00069-of-00075.safetensors", + "model.layers.86.self_attn.k_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.86.self_attn.o_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.86.self_attn.q_norm.weight": "model-00069-of-00075.safetensors", + "model.layers.86.self_attn.q_proj.bias": "model-00069-of-00075.safetensors", + "model.layers.86.self_attn.q_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.86.self_attn.v_proj.bias": "model-00069-of-00075.safetensors", + "model.layers.86.self_attn.v_proj.weight": "model-00069-of-00075.safetensors", + "model.layers.87.input_layernorm.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.0.down_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.87.mlp.experts.0.gate_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.87.mlp.experts.0.up_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.87.mlp.experts.1.down_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.87.mlp.experts.1.gate_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.87.mlp.experts.1.up_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.87.mlp.experts.10.down_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.87.mlp.experts.10.gate_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.87.mlp.experts.10.up_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.87.mlp.experts.11.down_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.87.mlp.experts.11.gate_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.87.mlp.experts.11.up_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.87.mlp.experts.12.down_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.87.mlp.experts.12.gate_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.87.mlp.experts.12.up_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.87.mlp.experts.13.down_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.87.mlp.experts.13.gate_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.87.mlp.experts.13.up_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.87.mlp.experts.14.down_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.87.mlp.experts.14.gate_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.87.mlp.experts.14.up_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.87.mlp.experts.15.down_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.87.mlp.experts.15.gate_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.87.mlp.experts.15.up_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.87.mlp.experts.16.down_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.87.mlp.experts.16.gate_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.87.mlp.experts.16.up_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.87.mlp.experts.17.down_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.87.mlp.experts.17.gate_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.87.mlp.experts.17.up_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.87.mlp.experts.18.down_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.87.mlp.experts.18.gate_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.87.mlp.experts.18.up_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.87.mlp.experts.19.down_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.87.mlp.experts.19.gate_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.87.mlp.experts.19.up_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.87.mlp.experts.2.down_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.87.mlp.experts.2.gate_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.87.mlp.experts.2.up_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.87.mlp.experts.20.down_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.87.mlp.experts.20.gate_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.87.mlp.experts.20.up_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.87.mlp.experts.21.down_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.87.mlp.experts.21.gate_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.87.mlp.experts.21.up_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.87.mlp.experts.22.down_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.87.mlp.experts.22.gate_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.87.mlp.experts.22.up_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.87.mlp.experts.23.down_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.87.mlp.experts.23.gate_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.87.mlp.experts.23.up_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.87.mlp.experts.24.down_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.87.mlp.experts.24.gate_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.87.mlp.experts.24.up_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.87.mlp.experts.25.down_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.87.mlp.experts.25.gate_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.87.mlp.experts.25.up_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.87.mlp.experts.26.down_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.87.mlp.experts.26.gate_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.87.mlp.experts.26.up_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.87.mlp.experts.27.down_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.87.mlp.experts.27.gate_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.87.mlp.experts.27.up_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.87.mlp.experts.28.down_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.87.mlp.experts.28.gate_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.87.mlp.experts.28.up_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.87.mlp.experts.29.down_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.87.mlp.experts.29.gate_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.87.mlp.experts.29.up_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.87.mlp.experts.3.down_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.87.mlp.experts.3.gate_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.87.mlp.experts.3.up_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.87.mlp.experts.30.down_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.87.mlp.experts.30.gate_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.87.mlp.experts.30.up_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.87.mlp.experts.31.down_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.87.mlp.experts.31.gate_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.87.mlp.experts.31.up_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.87.mlp.experts.32.down_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.32.gate_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.87.mlp.experts.32.up_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.33.down_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.33.gate_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.33.up_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.34.down_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.34.gate_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.34.up_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.35.down_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.35.gate_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.35.up_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.36.down_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.36.gate_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.36.up_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.37.down_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.37.gate_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.37.up_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.38.down_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.38.gate_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.38.up_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.39.down_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.39.gate_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.39.up_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.4.down_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.87.mlp.experts.4.gate_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.87.mlp.experts.4.up_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.87.mlp.experts.40.down_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.40.gate_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.40.up_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.41.down_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.41.gate_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.41.up_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.42.down_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.42.gate_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.42.up_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.43.down_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.43.gate_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.43.up_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.44.down_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.44.gate_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.44.up_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.45.down_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.45.gate_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.45.up_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.46.down_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.46.gate_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.46.up_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.47.down_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.47.gate_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.47.up_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.48.down_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.48.gate_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.48.up_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.49.down_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.49.gate_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.49.up_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.5.down_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.87.mlp.experts.5.gate_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.87.mlp.experts.5.up_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.87.mlp.experts.50.down_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.50.gate_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.50.up_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.51.down_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.51.gate_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.51.up_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.52.down_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.52.gate_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.52.up_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.53.down_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.53.gate_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.53.up_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.54.down_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.54.gate_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.54.up_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.55.down_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.55.gate_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.55.up_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.56.down_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.56.gate_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.56.up_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.57.down_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.57.gate_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.57.up_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.58.down_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.58.gate_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.58.up_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.59.down_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.59.gate_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.59.up_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.6.down_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.87.mlp.experts.6.gate_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.87.mlp.experts.6.up_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.87.mlp.experts.60.down_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.60.gate_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.60.up_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.61.down_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.61.gate_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.61.up_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.62.down_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.62.gate_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.62.up_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.63.down_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.63.gate_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.63.up_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.64.down_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.64.gate_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.64.up_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.65.down_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.65.gate_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.65.up_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.66.down_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.66.gate_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.66.up_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.67.down_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.67.gate_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.67.up_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.68.down_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.68.gate_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.68.up_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.69.down_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.69.gate_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.69.up_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.7.down_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.87.mlp.experts.7.gate_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.87.mlp.experts.7.up_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.87.mlp.experts.70.down_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.70.gate_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.70.up_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.71.down_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.71.gate_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.71.up_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.72.down_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.72.gate_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.72.up_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.73.down_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.73.gate_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.73.up_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.74.down_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.74.gate_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.74.up_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.75.down_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.75.gate_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.75.up_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.76.down_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.76.gate_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.76.up_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.77.down_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.77.gate_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.77.up_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.78.down_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.78.gate_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.78.up_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.79.down_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.79.gate_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.79.up_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.experts.8.down_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.87.mlp.experts.8.gate_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.87.mlp.experts.8.up_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.87.mlp.experts.9.down_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.87.mlp.experts.9.gate_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.87.mlp.experts.9.up_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.87.mlp.gate.e_score_correction_bias": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.gate.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.shared_experts.down_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.shared_experts.gate_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.mlp.shared_experts.up_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.87.post_attention_layernorm.weight": "model-00071-of-00075.safetensors", + "model.layers.87.self_attn.k_norm.weight": "model-00070-of-00075.safetensors", + "model.layers.87.self_attn.k_proj.bias": "model-00070-of-00075.safetensors", + "model.layers.87.self_attn.k_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.87.self_attn.o_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.87.self_attn.q_norm.weight": "model-00070-of-00075.safetensors", + "model.layers.87.self_attn.q_proj.bias": "model-00070-of-00075.safetensors", + "model.layers.87.self_attn.q_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.87.self_attn.v_proj.bias": "model-00070-of-00075.safetensors", + "model.layers.87.self_attn.v_proj.weight": "model-00070-of-00075.safetensors", + "model.layers.88.input_layernorm.weight": "model-00072-of-00075.safetensors", + "model.layers.88.mlp.experts.0.down_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.0.gate_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.0.up_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.1.down_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.1.gate_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.1.up_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.10.down_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.10.gate_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.10.up_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.11.down_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.11.gate_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.11.up_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.12.down_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.12.gate_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.12.up_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.13.down_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.13.gate_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.13.up_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.14.down_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.14.gate_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.14.up_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.15.down_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.15.gate_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.15.up_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.16.down_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.16.gate_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.16.up_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.17.down_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.17.gate_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.17.up_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.18.down_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.18.gate_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.18.up_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.19.down_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.19.gate_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.19.up_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.2.down_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.2.gate_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.2.up_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.20.down_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.20.gate_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.20.up_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.21.down_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.21.gate_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.21.up_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.22.down_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.22.gate_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.22.up_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.23.down_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.23.gate_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.23.up_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.24.down_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.24.gate_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.24.up_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.25.down_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.25.gate_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.25.up_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.26.down_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.26.gate_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.26.up_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.27.down_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.27.gate_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.27.up_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.28.down_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.28.gate_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.28.up_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.29.down_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.29.gate_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.29.up_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.3.down_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.3.gate_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.3.up_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.30.down_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.30.gate_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.30.up_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.31.down_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.31.gate_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.31.up_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.32.down_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.32.gate_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.32.up_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.33.down_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.33.gate_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.33.up_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.34.down_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.34.gate_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.34.up_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.35.down_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.35.gate_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.35.up_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.36.down_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.36.gate_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.36.up_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.37.down_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.37.gate_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.37.up_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.38.down_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.38.gate_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.38.up_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.39.down_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.39.gate_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.39.up_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.4.down_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.4.gate_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.4.up_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.40.down_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.40.gate_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.40.up_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.41.down_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.41.gate_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.41.up_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.42.down_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.42.gate_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.42.up_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.43.down_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.43.gate_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.43.up_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.44.down_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.44.gate_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.44.up_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.45.down_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.45.gate_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.45.up_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.46.down_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.46.gate_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.46.up_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.47.down_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.47.gate_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.47.up_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.48.down_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.48.gate_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.48.up_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.49.down_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.49.gate_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.49.up_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.5.down_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.5.gate_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.5.up_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.50.down_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.50.gate_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.50.up_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.51.down_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.88.mlp.experts.51.gate_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.51.up_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.88.mlp.experts.52.down_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.88.mlp.experts.52.gate_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.88.mlp.experts.52.up_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.88.mlp.experts.53.down_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.88.mlp.experts.53.gate_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.88.mlp.experts.53.up_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.88.mlp.experts.54.down_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.88.mlp.experts.54.gate_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.88.mlp.experts.54.up_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.88.mlp.experts.55.down_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.88.mlp.experts.55.gate_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.88.mlp.experts.55.up_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.88.mlp.experts.56.down_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.88.mlp.experts.56.gate_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.88.mlp.experts.56.up_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.88.mlp.experts.57.down_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.88.mlp.experts.57.gate_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.88.mlp.experts.57.up_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.88.mlp.experts.58.down_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.88.mlp.experts.58.gate_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.88.mlp.experts.58.up_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.88.mlp.experts.59.down_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.88.mlp.experts.59.gate_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.88.mlp.experts.59.up_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.88.mlp.experts.6.down_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.6.gate_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.6.up_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.60.down_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.88.mlp.experts.60.gate_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.88.mlp.experts.60.up_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.88.mlp.experts.61.down_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.88.mlp.experts.61.gate_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.88.mlp.experts.61.up_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.88.mlp.experts.62.down_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.88.mlp.experts.62.gate_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.88.mlp.experts.62.up_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.88.mlp.experts.63.down_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.88.mlp.experts.63.gate_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.88.mlp.experts.63.up_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.88.mlp.experts.64.down_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.88.mlp.experts.64.gate_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.88.mlp.experts.64.up_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.88.mlp.experts.65.down_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.88.mlp.experts.65.gate_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.88.mlp.experts.65.up_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.88.mlp.experts.66.down_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.88.mlp.experts.66.gate_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.88.mlp.experts.66.up_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.88.mlp.experts.67.down_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.88.mlp.experts.67.gate_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.88.mlp.experts.67.up_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.88.mlp.experts.68.down_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.88.mlp.experts.68.gate_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.88.mlp.experts.68.up_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.88.mlp.experts.69.down_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.88.mlp.experts.69.gate_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.88.mlp.experts.69.up_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.88.mlp.experts.7.down_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.7.gate_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.7.up_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.70.down_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.88.mlp.experts.70.gate_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.88.mlp.experts.70.up_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.88.mlp.experts.71.down_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.88.mlp.experts.71.gate_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.88.mlp.experts.71.up_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.88.mlp.experts.72.down_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.88.mlp.experts.72.gate_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.88.mlp.experts.72.up_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.88.mlp.experts.73.down_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.88.mlp.experts.73.gate_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.88.mlp.experts.73.up_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.88.mlp.experts.74.down_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.88.mlp.experts.74.gate_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.88.mlp.experts.74.up_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.88.mlp.experts.75.down_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.88.mlp.experts.75.gate_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.88.mlp.experts.75.up_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.88.mlp.experts.76.down_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.88.mlp.experts.76.gate_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.88.mlp.experts.76.up_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.88.mlp.experts.77.down_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.88.mlp.experts.77.gate_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.88.mlp.experts.77.up_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.88.mlp.experts.78.down_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.88.mlp.experts.78.gate_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.88.mlp.experts.78.up_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.88.mlp.experts.79.down_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.88.mlp.experts.79.gate_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.88.mlp.experts.79.up_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.88.mlp.experts.8.down_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.8.gate_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.8.up_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.9.down_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.9.gate_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.experts.9.up_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.mlp.gate.e_score_correction_bias": "model-00072-of-00075.safetensors", + "model.layers.88.mlp.gate.weight": "model-00072-of-00075.safetensors", + "model.layers.88.mlp.shared_experts.down_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.88.mlp.shared_experts.gate_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.88.mlp.shared_experts.up_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.88.post_attention_layernorm.weight": "model-00072-of-00075.safetensors", + "model.layers.88.self_attn.k_norm.weight": "model-00071-of-00075.safetensors", + "model.layers.88.self_attn.k_proj.bias": "model-00071-of-00075.safetensors", + "model.layers.88.self_attn.k_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.self_attn.o_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.self_attn.q_norm.weight": "model-00071-of-00075.safetensors", + "model.layers.88.self_attn.q_proj.bias": "model-00071-of-00075.safetensors", + "model.layers.88.self_attn.q_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.88.self_attn.v_proj.bias": "model-00071-of-00075.safetensors", + "model.layers.88.self_attn.v_proj.weight": "model-00071-of-00075.safetensors", + "model.layers.89.input_layernorm.weight": "model-00073-of-00075.safetensors", + "model.layers.89.mlp.experts.0.down_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.0.gate_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.0.up_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.1.down_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.1.gate_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.1.up_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.10.down_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.10.gate_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.10.up_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.11.down_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.11.gate_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.11.up_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.12.down_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.12.gate_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.12.up_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.13.down_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.13.gate_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.13.up_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.14.down_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.14.gate_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.14.up_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.15.down_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.15.gate_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.15.up_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.16.down_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.16.gate_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.16.up_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.17.down_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.17.gate_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.17.up_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.18.down_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.18.gate_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.18.up_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.19.down_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.19.gate_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.19.up_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.2.down_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.2.gate_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.2.up_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.20.down_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.20.gate_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.20.up_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.21.down_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.21.gate_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.21.up_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.22.down_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.22.gate_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.22.up_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.23.down_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.23.gate_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.23.up_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.24.down_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.24.gate_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.24.up_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.25.down_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.25.gate_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.25.up_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.26.down_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.26.gate_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.26.up_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.27.down_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.27.gate_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.27.up_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.28.down_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.28.gate_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.28.up_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.29.down_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.29.gate_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.29.up_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.3.down_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.3.gate_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.3.up_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.30.down_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.30.gate_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.30.up_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.31.down_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.31.gate_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.31.up_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.32.down_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.32.gate_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.32.up_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.33.down_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.33.gate_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.33.up_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.34.down_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.34.gate_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.34.up_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.35.down_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.35.gate_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.35.up_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.36.down_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.36.gate_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.36.up_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.37.down_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.37.gate_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.37.up_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.38.down_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.38.gate_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.38.up_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.39.down_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.39.gate_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.39.up_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.4.down_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.4.gate_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.4.up_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.40.down_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.40.gate_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.40.up_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.41.down_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.41.gate_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.41.up_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.42.down_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.42.gate_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.42.up_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.43.down_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.43.gate_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.43.up_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.44.down_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.44.gate_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.44.up_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.45.down_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.45.gate_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.45.up_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.46.down_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.46.gate_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.46.up_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.47.down_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.47.gate_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.47.up_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.48.down_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.48.gate_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.48.up_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.49.down_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.49.gate_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.49.up_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.5.down_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.5.gate_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.5.up_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.50.down_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.50.gate_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.50.up_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.51.down_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.51.gate_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.51.up_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.52.down_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.52.gate_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.52.up_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.53.down_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.53.gate_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.53.up_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.54.down_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.54.gate_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.54.up_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.55.down_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.55.gate_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.55.up_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.56.down_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.56.gate_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.56.up_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.57.down_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.57.gate_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.57.up_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.58.down_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.58.gate_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.58.up_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.59.down_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.59.gate_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.59.up_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.6.down_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.6.gate_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.6.up_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.60.down_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.60.gate_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.60.up_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.61.down_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.61.gate_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.61.up_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.62.down_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.62.gate_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.62.up_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.63.down_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.63.gate_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.63.up_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.64.down_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.64.gate_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.64.up_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.65.down_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.65.gate_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.65.up_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.66.down_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.66.gate_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.66.up_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.67.down_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.67.gate_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.67.up_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.68.down_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.68.gate_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.68.up_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.69.down_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.69.gate_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.69.up_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.7.down_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.7.gate_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.7.up_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.70.down_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.89.mlp.experts.70.gate_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.70.up_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.89.mlp.experts.71.down_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.89.mlp.experts.71.gate_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.89.mlp.experts.71.up_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.89.mlp.experts.72.down_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.89.mlp.experts.72.gate_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.89.mlp.experts.72.up_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.89.mlp.experts.73.down_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.89.mlp.experts.73.gate_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.89.mlp.experts.73.up_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.89.mlp.experts.74.down_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.89.mlp.experts.74.gate_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.89.mlp.experts.74.up_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.89.mlp.experts.75.down_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.89.mlp.experts.75.gate_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.89.mlp.experts.75.up_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.89.mlp.experts.76.down_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.89.mlp.experts.76.gate_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.89.mlp.experts.76.up_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.89.mlp.experts.77.down_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.89.mlp.experts.77.gate_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.89.mlp.experts.77.up_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.89.mlp.experts.78.down_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.89.mlp.experts.78.gate_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.89.mlp.experts.78.up_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.89.mlp.experts.79.down_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.89.mlp.experts.79.gate_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.89.mlp.experts.79.up_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.89.mlp.experts.8.down_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.8.gate_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.8.up_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.9.down_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.9.gate_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.experts.9.up_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.mlp.gate.e_score_correction_bias": "model-00073-of-00075.safetensors", + "model.layers.89.mlp.gate.weight": "model-00073-of-00075.safetensors", + "model.layers.89.mlp.shared_experts.down_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.89.mlp.shared_experts.gate_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.89.mlp.shared_experts.up_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.89.post_attention_layernorm.weight": "model-00073-of-00075.safetensors", + "model.layers.89.self_attn.k_norm.weight": "model-00072-of-00075.safetensors", + "model.layers.89.self_attn.k_proj.bias": "model-00072-of-00075.safetensors", + "model.layers.89.self_attn.k_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.self_attn.o_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.self_attn.q_norm.weight": "model-00072-of-00075.safetensors", + "model.layers.89.self_attn.q_proj.bias": "model-00072-of-00075.safetensors", + "model.layers.89.self_attn.q_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.89.self_attn.v_proj.bias": "model-00072-of-00075.safetensors", + "model.layers.89.self_attn.v_proj.weight": "model-00072-of-00075.safetensors", + "model.layers.9.input_layernorm.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.0.down_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.9.mlp.experts.0.gate_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.9.mlp.experts.0.up_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.9.mlp.experts.1.down_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.9.mlp.experts.1.gate_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.9.mlp.experts.1.up_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.9.mlp.experts.10.down_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.9.mlp.experts.10.gate_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.9.mlp.experts.10.up_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.9.mlp.experts.11.down_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.9.mlp.experts.11.gate_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.9.mlp.experts.11.up_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.9.mlp.experts.12.down_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.9.mlp.experts.12.gate_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.9.mlp.experts.12.up_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.9.mlp.experts.13.down_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.9.mlp.experts.13.gate_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.9.mlp.experts.13.up_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.9.mlp.experts.14.down_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.9.mlp.experts.14.gate_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.9.mlp.experts.14.up_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.9.mlp.experts.15.down_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.9.mlp.experts.15.gate_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.9.mlp.experts.15.up_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.9.mlp.experts.16.down_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.9.mlp.experts.16.gate_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.9.mlp.experts.16.up_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.9.mlp.experts.17.down_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.9.mlp.experts.17.gate_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.9.mlp.experts.17.up_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.9.mlp.experts.18.down_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.9.mlp.experts.18.gate_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.9.mlp.experts.18.up_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.9.mlp.experts.19.down_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.9.mlp.experts.19.gate_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.9.mlp.experts.19.up_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.9.mlp.experts.2.down_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.9.mlp.experts.2.gate_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.9.mlp.experts.2.up_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.9.mlp.experts.20.down_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.9.mlp.experts.20.gate_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.9.mlp.experts.20.up_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.9.mlp.experts.21.down_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.9.mlp.experts.21.gate_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.9.mlp.experts.21.up_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.9.mlp.experts.22.down_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.9.mlp.experts.22.gate_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.9.mlp.experts.22.up_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.9.mlp.experts.23.down_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.9.mlp.experts.23.gate_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.9.mlp.experts.23.up_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.9.mlp.experts.24.down_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.9.mlp.experts.24.gate_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.9.mlp.experts.24.up_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.9.mlp.experts.25.down_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.9.mlp.experts.25.gate_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.9.mlp.experts.25.up_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.9.mlp.experts.26.down_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.9.mlp.experts.26.gate_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.9.mlp.experts.26.up_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.9.mlp.experts.27.down_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.9.mlp.experts.27.gate_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.9.mlp.experts.27.up_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.9.mlp.experts.28.down_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.9.mlp.experts.28.gate_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.9.mlp.experts.28.up_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.9.mlp.experts.29.down_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.9.mlp.experts.29.gate_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.9.mlp.experts.29.up_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.9.mlp.experts.3.down_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.9.mlp.experts.3.gate_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.9.mlp.experts.3.up_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.9.mlp.experts.30.down_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.9.mlp.experts.30.gate_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.9.mlp.experts.30.up_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.9.mlp.experts.31.down_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.9.mlp.experts.31.gate_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.9.mlp.experts.31.up_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.9.mlp.experts.32.down_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.32.gate_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.9.mlp.experts.32.up_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.33.down_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.33.gate_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.33.up_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.34.down_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.34.gate_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.34.up_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.35.down_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.35.gate_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.35.up_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.36.down_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.36.gate_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.36.up_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.37.down_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.37.gate_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.37.up_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.38.down_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.38.gate_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.38.up_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.39.down_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.39.gate_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.39.up_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.4.down_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.9.mlp.experts.4.gate_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.9.mlp.experts.4.up_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.9.mlp.experts.40.down_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.40.gate_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.40.up_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.41.down_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.41.gate_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.41.up_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.42.down_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.42.gate_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.42.up_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.43.down_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.43.gate_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.43.up_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.44.down_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.44.gate_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.44.up_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.45.down_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.45.gate_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.45.up_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.46.down_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.46.gate_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.46.up_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.47.down_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.47.gate_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.47.up_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.48.down_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.48.gate_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.48.up_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.49.down_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.49.gate_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.49.up_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.5.down_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.9.mlp.experts.5.gate_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.9.mlp.experts.5.up_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.9.mlp.experts.50.down_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.50.gate_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.50.up_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.51.down_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.51.gate_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.51.up_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.52.down_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.52.gate_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.52.up_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.53.down_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.53.gate_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.53.up_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.54.down_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.54.gate_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.54.up_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.55.down_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.55.gate_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.55.up_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.56.down_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.56.gate_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.56.up_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.57.down_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.57.gate_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.57.up_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.58.down_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.58.gate_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.58.up_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.59.down_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.59.gate_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.59.up_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.6.down_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.9.mlp.experts.6.gate_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.9.mlp.experts.6.up_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.9.mlp.experts.60.down_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.60.gate_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.60.up_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.61.down_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.61.gate_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.61.up_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.62.down_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.62.gate_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.62.up_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.63.down_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.63.gate_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.63.up_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.64.down_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.64.gate_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.64.up_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.65.down_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.65.gate_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.65.up_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.66.down_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.66.gate_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.66.up_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.67.down_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.67.gate_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.67.up_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.68.down_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.68.gate_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.68.up_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.69.down_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.69.gate_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.69.up_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.7.down_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.9.mlp.experts.7.gate_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.9.mlp.experts.7.up_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.9.mlp.experts.70.down_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.70.gate_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.70.up_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.71.down_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.71.gate_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.71.up_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.72.down_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.72.gate_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.72.up_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.73.down_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.73.gate_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.73.up_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.74.down_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.74.gate_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.74.up_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.75.down_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.75.gate_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.75.up_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.76.down_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.76.gate_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.76.up_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.77.down_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.77.gate_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.77.up_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.78.down_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.78.gate_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.78.up_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.79.down_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.79.gate_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.79.up_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.experts.8.down_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.9.mlp.experts.8.gate_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.9.mlp.experts.8.up_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.9.mlp.experts.9.down_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.9.mlp.experts.9.gate_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.9.mlp.experts.9.up_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.9.mlp.gate.e_score_correction_bias": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.gate.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.shared_experts.down_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.shared_experts.gate_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.mlp.shared_experts.up_proj.weight": "model-00007-of-00075.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00007-of-00075.safetensors", + "model.layers.9.self_attn.k_norm.weight": "model-00006-of-00075.safetensors", + "model.layers.9.self_attn.k_proj.bias": "model-00006-of-00075.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.9.self_attn.q_norm.weight": "model-00006-of-00075.safetensors", + "model.layers.9.self_attn.q_proj.bias": "model-00006-of-00075.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.9.self_attn.v_proj.bias": "model-00006-of-00075.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00006-of-00075.safetensors", + "model.layers.90.input_layernorm.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.0.down_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.0.gate_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.0.up_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.1.down_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.1.gate_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.1.up_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.10.down_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.10.gate_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.10.up_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.11.down_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.11.gate_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.11.up_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.12.down_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.12.gate_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.12.up_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.13.down_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.13.gate_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.13.up_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.14.down_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.14.gate_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.14.up_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.15.down_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.15.gate_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.15.up_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.16.down_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.16.gate_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.16.up_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.17.down_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.17.gate_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.17.up_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.18.down_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.18.gate_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.18.up_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.19.down_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.19.gate_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.19.up_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.2.down_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.2.gate_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.2.up_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.20.down_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.20.gate_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.20.up_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.21.down_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.21.gate_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.21.up_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.22.down_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.22.gate_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.22.up_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.23.down_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.23.gate_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.23.up_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.24.down_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.24.gate_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.24.up_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.25.down_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.25.gate_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.25.up_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.26.down_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.26.gate_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.26.up_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.27.down_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.27.gate_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.27.up_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.28.down_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.28.gate_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.28.up_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.29.down_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.29.gate_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.29.up_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.3.down_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.3.gate_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.3.up_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.30.down_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.30.gate_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.30.up_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.31.down_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.31.gate_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.31.up_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.32.down_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.32.gate_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.32.up_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.33.down_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.33.gate_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.33.up_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.34.down_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.34.gate_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.34.up_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.35.down_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.35.gate_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.35.up_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.36.down_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.36.gate_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.36.up_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.37.down_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.37.gate_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.37.up_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.38.down_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.38.gate_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.38.up_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.39.down_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.39.gate_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.39.up_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.4.down_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.4.gate_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.4.up_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.40.down_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.40.gate_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.40.up_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.41.down_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.41.gate_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.41.up_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.42.down_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.42.gate_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.42.up_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.43.down_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.43.gate_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.43.up_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.44.down_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.44.gate_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.44.up_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.45.down_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.45.gate_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.45.up_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.46.down_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.46.gate_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.46.up_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.47.down_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.47.gate_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.47.up_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.48.down_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.48.gate_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.48.up_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.49.down_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.49.gate_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.49.up_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.5.down_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.5.gate_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.5.up_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.50.down_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.50.gate_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.50.up_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.51.down_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.51.gate_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.51.up_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.52.down_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.52.gate_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.52.up_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.53.down_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.53.gate_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.53.up_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.54.down_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.54.gate_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.54.up_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.55.down_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.55.gate_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.55.up_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.56.down_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.56.gate_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.56.up_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.57.down_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.57.gate_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.57.up_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.58.down_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.58.gate_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.58.up_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.59.down_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.59.gate_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.59.up_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.6.down_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.6.gate_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.6.up_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.60.down_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.60.gate_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.60.up_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.61.down_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.61.gate_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.61.up_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.62.down_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.62.gate_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.62.up_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.63.down_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.63.gate_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.63.up_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.64.down_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.64.gate_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.64.up_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.65.down_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.65.gate_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.65.up_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.66.down_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.66.gate_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.66.up_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.67.down_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.67.gate_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.67.up_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.68.down_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.68.gate_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.68.up_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.69.down_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.69.gate_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.69.up_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.7.down_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.7.gate_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.7.up_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.70.down_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.70.gate_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.70.up_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.71.down_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.71.gate_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.71.up_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.72.down_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.72.gate_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.72.up_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.73.down_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.73.gate_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.73.up_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.74.down_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.74.gate_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.74.up_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.75.down_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.75.gate_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.75.up_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.76.down_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.76.gate_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.76.up_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.77.down_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.77.gate_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.77.up_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.78.down_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.78.gate_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.78.up_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.79.down_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.79.gate_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.79.up_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.8.down_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.8.gate_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.8.up_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.9.down_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.9.gate_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.experts.9.up_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.gate.e_score_correction_bias": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.gate.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.shared_experts.down_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.shared_experts.gate_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.mlp.shared_experts.up_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.post_attention_layernorm.weight": "model-00073-of-00075.safetensors", + "model.layers.90.self_attn.k_norm.weight": "model-00073-of-00075.safetensors", + "model.layers.90.self_attn.k_proj.bias": "model-00073-of-00075.safetensors", + "model.layers.90.self_attn.k_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.self_attn.o_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.self_attn.q_norm.weight": "model-00073-of-00075.safetensors", + "model.layers.90.self_attn.q_proj.bias": "model-00073-of-00075.safetensors", + "model.layers.90.self_attn.q_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.90.self_attn.v_proj.bias": "model-00073-of-00075.safetensors", + "model.layers.90.self_attn.v_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.91.input_layernorm.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.0.down_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.91.mlp.experts.0.gate_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.91.mlp.experts.0.up_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.91.mlp.experts.1.down_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.91.mlp.experts.1.gate_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.91.mlp.experts.1.up_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.91.mlp.experts.10.down_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.10.gate_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.10.up_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.11.down_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.11.gate_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.11.up_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.12.down_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.12.gate_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.12.up_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.13.down_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.13.gate_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.13.up_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.14.down_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.14.gate_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.14.up_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.15.down_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.15.gate_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.15.up_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.16.down_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.16.gate_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.16.up_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.17.down_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.17.gate_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.17.up_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.18.down_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.18.gate_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.18.up_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.19.down_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.19.gate_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.19.up_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.2.down_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.2.gate_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.91.mlp.experts.2.up_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.91.mlp.experts.20.down_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.20.gate_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.20.up_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.21.down_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.21.gate_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.21.up_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.22.down_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.22.gate_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.22.up_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.23.down_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.23.gate_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.23.up_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.24.down_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.24.gate_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.24.up_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.25.down_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.25.gate_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.25.up_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.26.down_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.26.gate_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.26.up_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.27.down_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.27.gate_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.27.up_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.28.down_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.28.gate_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.28.up_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.29.down_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.29.gate_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.29.up_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.3.down_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.3.gate_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.3.up_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.30.down_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.30.gate_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.30.up_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.31.down_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.31.gate_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.31.up_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.32.down_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.32.gate_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.32.up_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.33.down_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.33.gate_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.33.up_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.34.down_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.34.gate_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.34.up_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.35.down_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.35.gate_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.35.up_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.36.down_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.36.gate_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.36.up_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.37.down_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.37.gate_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.37.up_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.38.down_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.38.gate_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.38.up_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.39.down_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.39.gate_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.39.up_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.4.down_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.4.gate_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.4.up_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.40.down_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.40.gate_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.40.up_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.41.down_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.41.gate_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.41.up_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.42.down_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.42.gate_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.42.up_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.43.down_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.43.gate_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.43.up_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.44.down_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.44.gate_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.44.up_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.45.down_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.45.gate_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.45.up_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.46.down_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.46.gate_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.46.up_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.47.down_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.47.gate_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.47.up_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.48.down_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.48.gate_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.48.up_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.49.down_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.49.gate_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.49.up_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.5.down_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.5.gate_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.5.up_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.50.down_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.50.gate_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.50.up_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.51.down_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.51.gate_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.51.up_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.52.down_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.52.gate_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.52.up_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.53.down_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.53.gate_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.53.up_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.54.down_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.54.gate_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.54.up_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.55.down_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.55.gate_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.55.up_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.56.down_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.56.gate_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.56.up_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.57.down_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.57.gate_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.57.up_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.58.down_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.58.gate_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.58.up_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.59.down_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.59.gate_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.59.up_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.6.down_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.6.gate_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.6.up_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.60.down_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.60.gate_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.60.up_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.61.down_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.61.gate_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.61.up_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.62.down_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.62.gate_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.62.up_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.63.down_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.63.gate_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.63.up_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.64.down_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.64.gate_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.64.up_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.65.down_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.65.gate_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.65.up_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.66.down_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.66.gate_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.66.up_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.67.down_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.67.gate_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.67.up_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.68.down_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.68.gate_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.68.up_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.69.down_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.69.gate_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.69.up_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.7.down_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.7.gate_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.7.up_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.70.down_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.70.gate_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.70.up_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.71.down_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.71.gate_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.71.up_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.72.down_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.72.gate_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.72.up_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.73.down_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.73.gate_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.73.up_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.74.down_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.74.gate_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.74.up_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.75.down_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.75.gate_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.75.up_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.76.down_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.76.gate_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.76.up_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.77.down_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.77.gate_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.77.up_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.78.down_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.78.gate_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.78.up_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.79.down_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.79.gate_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.79.up_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.8.down_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.8.gate_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.8.up_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.9.down_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.9.gate_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.experts.9.up_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.gate.e_score_correction_bias": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.gate.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.shared_experts.down_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.shared_experts.gate_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.mlp.shared_experts.up_proj.weight": "model-00074-of-00075.safetensors", + "model.layers.91.post_attention_layernorm.weight": "model-00074-of-00075.safetensors", + "model.layers.91.self_attn.k_norm.weight": "model-00073-of-00075.safetensors", + "model.layers.91.self_attn.k_proj.bias": "model-00073-of-00075.safetensors", + "model.layers.91.self_attn.k_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.91.self_attn.o_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.91.self_attn.q_norm.weight": "model-00073-of-00075.safetensors", + "model.layers.91.self_attn.q_proj.bias": "model-00073-of-00075.safetensors", + "model.layers.91.self_attn.q_proj.weight": "model-00073-of-00075.safetensors", + "model.layers.91.self_attn.v_proj.bias": "model-00073-of-00075.safetensors", + "model.layers.91.self_attn.v_proj.weight": "model-00073-of-00075.safetensors", + "model.norm.weight": "model-00074-of-00075.safetensors" + } +} diff --git a/modeling_glm4_moe.py b/modeling_glm4_moe.py new file mode 100644 index 0000000000000000000000000000000000000000..5b6567780eac691ef3204d491a4ce7bd81084a39 --- /dev/null +++ b/modeling_glm4_moe.py @@ -0,0 +1,624 @@ +# 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨 +# This file was automatically generated from src/transformers/models/glm4_moe/modular_glm4_moe.py. +# Do NOT edit this file manually as any edits will be overwritten by the generation of +# the file from the modular. If any change should be done, please apply the change to the +# modular_glm4_moe.py file directly. One of our CI enforces this. +# 🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨🚨 +# coding=utf-8 +# Copyright 2025 The ZhipuAI Inc. team and HuggingFace Inc. team. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Callable, Optional, Union + +import torch +import torch.nn.functional as F +from torch import nn + +from transformers.activations import ACT2FN +from transformers.cache_utils import Cache, DynamicCache +from transformers.generation import GenerationMixin +from transformers.integrations import use_kernel_forward_from_hub +from transformers.masking_utils import create_causal_mask +from transformers.modeling_flash_attention_utils import FlashAttentionKwargs +from transformers.modeling_layers import GradientCheckpointingLayer +from transformers.modeling_outputs import BaseModelOutputWithPast, CausalLMOutputWithPast +from transformers.modeling_rope_utils import ROPE_INIT_FUNCTIONS, dynamic_rope_update +from transformers.modeling_utils import ALL_ATTENTION_FUNCTIONS, PreTrainedModel +from transformers.processing_utils import Unpack +from transformers.utils import TransformersKwargs, auto_docstring, can_return_tuple +from transformers.utils.generic import check_model_inputs +from transformers.models.glm4_moe.configuration_glm4_moe import Glm4MoeConfig + + +def repeat_kv(hidden_states: torch.Tensor, n_rep: int) -> torch.Tensor: + """ + This is the equivalent of torch.repeat_interleave(x, dim=1, repeats=n_rep). The hidden states go from (batch, + num_key_value_heads, seqlen, head_dim) to (batch, num_attention_heads, seqlen, head_dim) + """ + batch, num_key_value_heads, slen, head_dim = hidden_states.shape + if n_rep == 1: + return hidden_states + hidden_states = hidden_states[:, :, None, :, :].expand(batch, num_key_value_heads, n_rep, slen, head_dim) + return hidden_states.reshape(batch, num_key_value_heads * n_rep, slen, head_dim) + + +def eager_attention_forward( + module: nn.Module, + query: torch.Tensor, + key: torch.Tensor, + value: torch.Tensor, + attention_mask: Optional[torch.Tensor], + scaling: float, + dropout: float = 0.0, + **kwargs: Unpack[TransformersKwargs], +): + key_states = repeat_kv(key, module.num_key_value_groups) + value_states = repeat_kv(value, module.num_key_value_groups) + + attn_weights = torch.matmul(query, key_states.transpose(2, 3)) * scaling + if attention_mask is not None: + causal_mask = attention_mask[:, :, :, : key_states.shape[-2]] + attn_weights = attn_weights + causal_mask + + attn_weights = nn.functional.softmax(attn_weights, dim=-1, dtype=torch.float32).to(query.dtype) + attn_weights = nn.functional.dropout(attn_weights, p=dropout, training=module.training) + attn_output = torch.matmul(attn_weights, value_states) + attn_output = attn_output.transpose(1, 2).contiguous() + + return attn_output, attn_weights + + +def rotate_half(x): + """Rotates half the hidden dims of the input.""" + x1 = x[..., : x.shape[-1] // 2] + x2 = x[..., x.shape[-1] // 2 :] + return torch.cat((-x2, x1), dim=-1) + + +def apply_rotary_pos_emb(q, k, cos, sin, position_ids=None, unsqueeze_dim=1): + """Applies Rotary Position Embedding to the query and key tensors. + + Args: + q (`torch.Tensor`): The query tensor. + k (`torch.Tensor`): The key tensor. + cos (`torch.Tensor`): The cosine part of the rotary embedding. + sin (`torch.Tensor`): The sine part of the rotary embedding. + position_ids (`torch.Tensor`, *optional*): + Deprecated and unused. + unsqueeze_dim (`int`, *optional*, defaults to 1): + The 'unsqueeze_dim' argument specifies the dimension along which to unsqueeze cos[position_ids] and + sin[position_ids] so that they can be properly broadcasted to the dimensions of q and k. For example, note + that cos[position_ids] and sin[position_ids] have the shape [batch_size, seq_len, head_dim]. Then, if q and + k have the shape [batch_size, heads, seq_len, head_dim], then setting unsqueeze_dim=1 makes + cos[position_ids] and sin[position_ids] broadcastable to the shapes of q and k. Similarly, if q and k have + the shape [batch_size, seq_len, heads, head_dim], then set unsqueeze_dim=2. + Returns: + `tuple(torch.Tensor)` comprising of the query and key tensors rotated using the Rotary Position Embedding. + """ + cos = cos.unsqueeze(unsqueeze_dim) + sin = sin.unsqueeze(unsqueeze_dim) + + # Keep half or full tensor for later concatenation + rotary_dim = cos.shape[-1] + q_rot, q_pass = q[..., :rotary_dim], q[..., rotary_dim:] + k_rot, k_pass = k[..., :rotary_dim], k[..., rotary_dim:] + + # Apply rotary embeddings on the first half or full tensor + q_embed = (q_rot * cos) + (rotate_half(q_rot) * sin) + k_embed = (k_rot * cos) + (rotate_half(k_rot) * sin) + + # Concatenate back to full shape + q_embed = torch.cat([q_embed, q_pass], dim=-1) + k_embed = torch.cat([k_embed, k_pass], dim=-1) + return q_embed, k_embed + + +class Glm4MoeAttention(nn.Module): + """Multi-headed attention from 'Attention Is All You Need' paper""" + + def __init__(self, config: Glm4MoeConfig, layer_idx: Optional[int] = None): + super().__init__() + self.config = config + self.layer_idx = layer_idx + self.head_dim = getattr(config, "head_dim", config.hidden_size // config.num_attention_heads) + self.num_key_value_groups = config.num_attention_heads // config.num_key_value_heads + self.scaling = self.head_dim**-0.5 + self.attention_dropout = config.attention_dropout + self.is_causal = True + + self.q_proj = nn.Linear( + config.hidden_size, config.num_attention_heads * self.head_dim, bias=config.attention_bias + ) + self.k_proj = nn.Linear( + config.hidden_size, config.num_key_value_heads * self.head_dim, bias=config.attention_bias + ) + self.v_proj = nn.Linear( + config.hidden_size, config.num_key_value_heads * self.head_dim, bias=config.attention_bias + ) + self.o_proj = nn.Linear(config.num_attention_heads * self.head_dim, config.hidden_size, bias=False) + self.use_qk_norm = config.use_qk_norm + if self.use_qk_norm: + self.q_norm = Glm4MoeRMSNorm(self.head_dim, eps=config.rms_norm_eps) + self.k_norm = Glm4MoeRMSNorm(self.head_dim, eps=config.rms_norm_eps) + + def forward( + self, + hidden_states: torch.Tensor, + position_embeddings: tuple[torch.Tensor, torch.Tensor], + attention_mask: Optional[torch.Tensor], + past_key_value: Optional[Cache] = None, + cache_position: Optional[torch.LongTensor] = None, + **kwargs: Unpack[FlashAttentionKwargs], + ) -> tuple[torch.Tensor, Optional[torch.Tensor]]: + input_shape = hidden_states.shape[:-1] + hidden_shape = (*input_shape, -1, self.head_dim) + + query_states = self.q_proj(hidden_states).view(hidden_shape) + key_states = self.k_proj(hidden_states).view(hidden_shape) + value_states = self.v_proj(hidden_states).view(hidden_shape) + + if self.use_qk_norm: # main diff from Llama + query_states = self.q_norm(query_states) + key_states = self.k_norm(key_states) + + query_states = query_states.transpose(1, 2) + key_states = key_states.transpose(1, 2) + value_states = value_states.transpose(1, 2) + + cos, sin = position_embeddings + query_states, key_states = apply_rotary_pos_emb(query_states, key_states, cos, sin) + + if past_key_value is not None: + # sin and cos are specific to RoPE models; position_ids needed for the static cache + cache_kwargs = {"sin": sin, "cos": cos, "cache_position": cache_position} + key_states, value_states = past_key_value.update(key_states, value_states, self.layer_idx, cache_kwargs) + + attention_interface: Callable = eager_attention_forward + if self.config._attn_implementation != "eager": + attention_interface = ALL_ATTENTION_FUNCTIONS[self.config._attn_implementation] + + attn_output, attn_weights = attention_interface( + self, + query_states, + key_states, + value_states, + attention_mask, + dropout=0.0 if not self.training else self.attention_dropout, + scaling=self.scaling, + **kwargs, + ) + + attn_output = attn_output.reshape(*input_shape, -1).contiguous() + attn_output = self.o_proj(attn_output) + return attn_output, attn_weights + + +class Glm4MoeMLP(nn.Module): + def __init__(self, config, hidden_size=None, intermediate_size=None): + super().__init__() + self.config = config + self.hidden_size = config.hidden_size if hidden_size is None else hidden_size + self.intermediate_size = config.intermediate_size if intermediate_size is None else intermediate_size + + self.gate_proj = nn.Linear(self.hidden_size, self.intermediate_size, bias=False) + self.up_proj = nn.Linear(self.hidden_size, self.intermediate_size, bias=False) + self.down_proj = nn.Linear(self.intermediate_size, self.hidden_size, bias=False) + self.act_fn = ACT2FN[config.hidden_act] + + def forward(self, x): + down_proj = self.down_proj(self.act_fn(self.gate_proj(x)) * self.up_proj(x)) + return down_proj + + +class Glm4MoeTopkRouter(nn.Module): + def __init__(self, config: Glm4MoeConfig): + super().__init__() + self.config = config + self.top_k = config.num_experts_per_tok + self.n_routed_experts = config.n_routed_experts + self.routed_scaling_factor = config.routed_scaling_factor + self.n_group = config.n_group + self.topk_group = config.topk_group + self.norm_topk_prob = config.norm_topk_prob + + self.weight = nn.Parameter(torch.empty((self.n_routed_experts, config.hidden_size))) + self.register_buffer("e_score_correction_bias", torch.zeros((self.n_routed_experts), dtype=torch.float32)) + + @torch.no_grad() + def get_topk_indices(self, scores): + scores_for_choice = scores.view(-1, self.n_routed_experts) + self.e_score_correction_bias.unsqueeze(0) + group_scores = ( + scores_for_choice.view(-1, self.n_group, self.n_routed_experts // self.n_group) + .topk(2, dim=-1)[0] + .sum(dim=-1) + ) + group_idx = torch.topk(group_scores, k=self.topk_group, dim=-1, sorted=False)[1] + group_mask = torch.zeros_like(group_scores) + group_mask.scatter_(1, group_idx, 1) + score_mask = ( + group_mask.unsqueeze(-1) + .expand(-1, self.n_group, self.n_routed_experts // self.n_group) + .reshape(-1, self.n_routed_experts) + ) + scores_for_choice = scores_for_choice.masked_fill(~score_mask.bool(), 0.0) + topk_indices = torch.topk(scores_for_choice, k=self.top_k, dim=-1, sorted=False)[1] + return topk_indices + + def forward(self, hidden_states): + hidden_states = hidden_states.view(-1, self.config.hidden_size) + router_logits = F.linear(hidden_states.type(torch.float32), self.weight.type(torch.float32)) + scores = router_logits.sigmoid() + topk_indices = self.get_topk_indices(scores) + topk_weights = scores.gather(1, topk_indices) + if self.norm_topk_prob: + denominator = topk_weights.sum(dim=-1, keepdim=True) + 1e-20 + topk_weights /= denominator + topk_weights = topk_weights * self.routed_scaling_factor + return topk_indices, topk_weights, router_logits + + +@use_kernel_forward_from_hub("RMSNorm") +class Glm4MoeRMSNorm(nn.Module): + def __init__(self, hidden_size, eps=1e-6): + """ + Glm4MoeRMSNorm is equivalent to T5LayerNorm + """ + super().__init__() + self.weight = nn.Parameter(torch.ones(hidden_size)) + self.variance_epsilon = eps + + def forward(self, hidden_states): + input_dtype = hidden_states.dtype + hidden_states = hidden_states.to(torch.float32) + variance = hidden_states.pow(2).mean(-1, keepdim=True) + hidden_states = hidden_states * torch.rsqrt(variance + self.variance_epsilon) + return self.weight * hidden_states.to(input_dtype) + + def extra_repr(self): + return f"{tuple(self.weight.shape)}, eps={self.variance_epsilon}" + + +class Glm4MoeMoE(nn.Module): + """ + A mixed expert module containing shared experts. + """ + + def __init__(self, config): + super().__init__() + self.config = config + self.experts = nn.ModuleList( + [ + Glm4MoeMLP(config, intermediate_size=config.moe_intermediate_size) + for _ in range(config.n_routed_experts) + ] + ) + self.gate = Glm4MoeTopkRouter(config) + self.shared_experts = Glm4MoeMLP( + config=config, intermediate_size=config.moe_intermediate_size * config.n_shared_experts + ) + + def moe(self, hidden_states: torch.Tensor, topk_indices: torch.Tensor, topk_weights: torch.Tensor): + r""" + CALL FOR CONTRIBUTION! I don't have time to optimise this right now, but expert weights need to be fused + to not have to do a loop here (deepseek has 256 experts soooo yeah). + """ + final_hidden_states = torch.zeros_like(hidden_states, dtype=topk_weights.dtype) + expert_mask = torch.nn.functional.one_hot(topk_indices, num_classes=len(self.experts)) + expert_mask = expert_mask.permute(2, 0, 1) + + for expert_idx in range(len(self.experts)): + expert = self.experts[expert_idx] + mask = expert_mask[expert_idx] + token_indices, weight_indices = torch.where(mask) + + if token_indices.numel() > 0: + expert_weights = topk_weights[token_indices, weight_indices] + expert_input = hidden_states[token_indices] + expert_output = expert(expert_input) + weighted_output = expert_output * expert_weights.unsqueeze(-1) + final_hidden_states.index_add_(0, token_indices, weighted_output) + + # in original deepseek, the output of the experts are gathered once we leave this module + # thus the moe module is itelsf an IsolatedParallel module + # and all expert are "local" meaning we shard but we don't gather + return final_hidden_states.type(hidden_states.dtype) + + def forward(self, hidden_states): + residuals = hidden_states + orig_shape = hidden_states.shape + topk_indices, topk_weights, router_logits = self.gate(hidden_states) + hidden_states = hidden_states.view(-1, hidden_states.shape[-1]) + hidden_states = self.moe(hidden_states, topk_indices, topk_weights).view(*orig_shape) + hidden_states = hidden_states + self.shared_experts(residuals) + return hidden_states, router_logits + + +class Glm4MoeDecoderLayer(GradientCheckpointingLayer): + def __init__(self, config: Glm4MoeConfig, layer_idx: int): + super().__init__() + self.hidden_size = config.hidden_size + + self.self_attn = Glm4MoeAttention(config=config, layer_idx=layer_idx) + + if layer_idx >= config.first_k_dense_replace: + self.mlp = Glm4MoeMoE(config) + else: + self.mlp = Glm4MoeMLP(config) + + self.input_layernorm = Glm4MoeRMSNorm(config.hidden_size, eps=config.rms_norm_eps) + self.post_attention_layernorm = Glm4MoeRMSNorm(config.hidden_size, eps=config.rms_norm_eps) + + def forward( + self, + hidden_states: torch.Tensor, + attention_mask: Optional[torch.Tensor] = None, + position_ids: Optional[torch.LongTensor] = None, + past_key_value: Optional[Cache] = None, + use_cache: Optional[bool] = False, + cache_position: Optional[torch.LongTensor] = None, + position_embeddings: Optional[tuple[torch.Tensor, torch.Tensor]] = None, # necessary, but kept here for BC + **kwargs: Unpack[TransformersKwargs], + ) -> torch.Tensor: + residual = hidden_states + hidden_states = self.input_layernorm(hidden_states) + # Self Attention + hidden_states, _ = self.self_attn( + hidden_states=hidden_states, + attention_mask=attention_mask, + position_ids=position_ids, + past_key_value=past_key_value, + use_cache=use_cache, + cache_position=cache_position, + position_embeddings=position_embeddings, + **kwargs, + ) + hidden_states = residual + hidden_states + + # Fully Connected + residual = hidden_states + hidden_states = self.post_attention_layernorm(hidden_states) + mlp_output = self.mlp(hidden_states) + if len(mlp_output) == 2: + # If the MLP returns both hidden states and router logits + hidden_states, _ = mlp_output + else: + # If the MLP returns only hidden states + hidden_states = mlp_output + hidden_states = residual + hidden_states + return hidden_states + + +@auto_docstring +class Glm4MoePreTrainedModel(PreTrainedModel): + config: Glm4MoeConfig + base_model_prefix = "model" + supports_gradient_checkpointing = True + _no_split_modules = ["Glm4MoeDecoderLayer"] + _skip_keys_device_placement = ["past_key_values"] + _supports_flash_attn = True + _supports_sdpa = True + _supports_flex_attn = True + _can_compile_fullgraph = False + _supports_attention_backend = True + _can_record_outputs = { + "hidden_states": Glm4MoeDecoderLayer, + "attentions": Glm4MoeAttention, + } + + def _init_weights(self, module): + super()._init_weights(module) + if isinstance(module, Glm4MoeTopkRouter): + module.weight.data.normal_(mean=0.0, std=self.config.initializer_range) + + +class Glm4MoeRotaryEmbedding(nn.Module): + inv_freq: torch.Tensor # fix linting for `register_buffer` + + def __init__(self, config: Glm4MoeConfig, device=None): + super().__init__() + # BC: "rope_type" was originally "type" + if hasattr(config, "rope_scaling") and isinstance(config.rope_scaling, dict): + self.rope_type = config.rope_scaling.get("rope_type", config.rope_scaling.get("type")) + else: + self.rope_type = "default" + self.max_seq_len_cached = config.max_position_embeddings + self.original_max_seq_len = config.max_position_embeddings + + self.config = config + self.rope_init_fn = ROPE_INIT_FUNCTIONS[self.rope_type] + + inv_freq, self.attention_scaling = self.rope_init_fn(self.config, device) + self.register_buffer("inv_freq", inv_freq, persistent=False) + self.original_inv_freq = self.inv_freq + + @torch.no_grad() + @dynamic_rope_update # power user: used with advanced RoPE types (e.g. dynamic rope) + def forward(self, x, position_ids): + inv_freq_expanded = self.inv_freq[None, :, None].float().expand(position_ids.shape[0], -1, 1).to(x.device) + position_ids_expanded = position_ids[:, None, :].float() + + device_type = x.device.type if isinstance(x.device.type, str) and x.device.type != "mps" else "cpu" + with torch.autocast(device_type=device_type, enabled=False): # Force float32 + freqs = (inv_freq_expanded.float() @ position_ids_expanded.float()).transpose(1, 2) + emb = torch.cat((freqs, freqs), dim=-1) + cos = emb.cos() * self.attention_scaling + sin = emb.sin() * self.attention_scaling + + return cos.to(dtype=x.dtype), sin.to(dtype=x.dtype) + + +@auto_docstring +class Glm4MoeModel(Glm4MoePreTrainedModel): + _keys_to_ignore_on_load_unexpected = [r"model\.layers\.92.*", r"model\.layers\.46.*"] + + def __init__(self, config: Glm4MoeConfig): + super().__init__(config) + self.padding_idx = config.pad_token_id + self.vocab_size = config.vocab_size + + self.embed_tokens = nn.Embedding(config.vocab_size, config.hidden_size, self.padding_idx) + self.layers = nn.ModuleList( + [Glm4MoeDecoderLayer(config, layer_idx) for layer_idx in range(config.num_hidden_layers)] + ) + self.norm = Glm4MoeRMSNorm(config.hidden_size, eps=config.rms_norm_eps) + self.rotary_emb = Glm4MoeRotaryEmbedding(config=config) + self.gradient_checkpointing = False + + # Initialize weights and apply final processing + self.post_init() + + @check_model_inputs + @auto_docstring + def forward( + self, + input_ids: Optional[torch.LongTensor] = None, + attention_mask: Optional[torch.Tensor] = None, + position_ids: Optional[torch.LongTensor] = None, + past_key_values: Optional[Cache] = None, + inputs_embeds: Optional[torch.FloatTensor] = None, + cache_position: Optional[torch.LongTensor] = None, + use_cache: Optional[bool] = None, + **kwargs: Unpack[TransformersKwargs], + ) -> BaseModelOutputWithPast: + if (input_ids is None) ^ (inputs_embeds is not None): + raise ValueError("You must specify exactly one of input_ids or inputs_embeds") + + if inputs_embeds is None: + inputs_embeds: torch.Tensor = self.embed_tokens(input_ids) + + if use_cache and past_key_values is None: + past_key_values = DynamicCache() + + if cache_position is None: + past_seen_tokens = past_key_values.get_seq_length() if past_key_values is not None else 0 + cache_position: torch.Tensor = torch.arange( + past_seen_tokens, past_seen_tokens + inputs_embeds.shape[1], device=inputs_embeds.device + ) + + if position_ids is None: + position_ids = cache_position.unsqueeze(0) + + causal_mask = create_causal_mask( + config=self.config, + input_embeds=inputs_embeds, + attention_mask=attention_mask, + cache_position=cache_position, + past_key_values=past_key_values, + position_ids=position_ids, + ) + + hidden_states = inputs_embeds + position_embeddings = self.rotary_emb(hidden_states, position_ids) + + for decoder_layer in self.layers[: self.config.num_hidden_layers]: + hidden_states = decoder_layer( + hidden_states, + attention_mask=causal_mask, + position_ids=position_ids, + past_key_value=past_key_values, + cache_position=cache_position, + position_embeddings=position_embeddings, + **kwargs, + ) + + hidden_states = self.norm(hidden_states) + return BaseModelOutputWithPast( + last_hidden_state=hidden_states, + past_key_values=past_key_values, + ) + + +@auto_docstring +class Glm4MoeForCausalLM(Glm4MoePreTrainedModel, GenerationMixin): + _tied_weights_keys = ["lm_head.weight"] + _tp_plan = {"lm_head": "colwise_rep"} + _pp_plan = {"lm_head": (["hidden_states"], ["logits"])} + + def __init__(self, config): + super().__init__(config) + self.model = Glm4MoeModel(config) + self.vocab_size = config.vocab_size + self.lm_head = nn.Linear(config.hidden_size, config.vocab_size, bias=False) + + # Initialize weights and apply final processing + self.post_init() + + def set_decoder(self, decoder): + self.model = decoder + + def get_decoder(self): + return self.model + + @can_return_tuple + @auto_docstring + def forward( + self, + input_ids: Optional[torch.LongTensor] = None, + attention_mask: Optional[torch.Tensor] = None, + position_ids: Optional[torch.LongTensor] = None, + past_key_values: Optional[Cache] = None, + inputs_embeds: Optional[torch.FloatTensor] = None, + labels: Optional[torch.LongTensor] = None, + use_cache: Optional[bool] = None, + cache_position: Optional[torch.LongTensor] = None, + logits_to_keep: Union[int, torch.Tensor] = 0, + **kwargs: Unpack[TransformersKwargs], + ) -> CausalLMOutputWithPast: + r""" + Example: + + ```python + >>> from transformers import AutoTokenizer, Glm4MoeForCausalLM + + >>> model = Glm4MoeForCausalLM.from_pretrained("meta-glm4_moe/Glm4Moe-2-7b-hf") + >>> tokenizer = AutoTokenizer.from_pretrained("meta-glm4_moe/Glm4Moe-2-7b-hf") + + >>> prompt = "Hey, are you conscious? Can you talk to me?" + >>> inputs = tokenizer(prompt, return_tensors="pt") + + >>> # Generate + >>> generate_ids = model.generate(inputs.input_ids, max_length=30) + >>> tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0] + "Hey, are you conscious? Can you talk to me?\nI'm not conscious, but I can talk to you." + ```""" + outputs: BaseModelOutputWithPast = self.model( + input_ids=input_ids, + attention_mask=attention_mask, + position_ids=position_ids, + past_key_values=past_key_values, + inputs_embeds=inputs_embeds, + use_cache=use_cache, + cache_position=cache_position, + **kwargs, + ) + + hidden_states = outputs.last_hidden_state + # Only compute necessary logits, and do not upcast them to float if we are not computing the loss + slice_indices = slice(-logits_to_keep, None) if isinstance(logits_to_keep, int) else logits_to_keep + logits = self.lm_head(hidden_states[:, slice_indices, :]) + + loss = None + if labels is not None: + loss = self.loss_function(logits=logits, labels=labels, vocab_size=self.config.vocab_size, **kwargs) + + return CausalLMOutputWithPast( + loss=loss, + logits=logits, + past_key_values=outputs.past_key_values, + hidden_states=outputs.hidden_states, + attentions=outputs.attentions, + ) + + +__all__ = ["Glm4MoePreTrainedModel", "Glm4MoeModel", "Glm4MoeForCausalLM"] \ No newline at end of file diff --git a/reap_args.yaml b/reap_args.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d4bc732a932148789cd0bcab5fbfbd42b75fe030 --- /dev/null +++ b/reap_args.yaml @@ -0,0 +1,76 @@ +cluster_args: + cluster_description: null + cluster_method: agglomerative + compression_ratio: 0.5 + expert_sim: ttm + frequency_penalty: true + linkage_method: average + max_cluster_size: null + multi_layer: null + num_clusters: null + singleton_outlier_experts: false + singleton_super_experts: false + softmax_temperature: null +ds_args: + dataset_config_name: all + dataset_name: theblackcat102/evol-codealpaca-v1 + dataset_test_split: test + shuffle: true + split: train +eval_args: + evalplus_tasks: + - mbpp + - humaneval + greedy: true + lm_eval_tasks: + - winogrande + - arc_challenge + - arc_easy + - boolq + - hellaswag + - mmlu + - openbookqa + - rte + min_p: 0.0 + parallel_tasks: 32 + results_dir: null + run_evalplus: true + run_livecodebench: true + run_lm_eval: true + run_math: false + run_wildbench: false + server_log_file_name: pruning-cli-0.log + temperature: 0.7 + top_k: 20 + top_p: 0.8 + use_server: true + vllm_port: 8000 +model_args: + model_name: zai-org/GLM-4.6 + num_experts_per_tok_override: null +obs_args: + distance_measure: cosine + model_max_length: 2048 + output_file_name: observations_10_cosine-seed_42.pt + overwrite_observations: false + record_pruning_metrics_only: true + renormalize_router_weights: false + return_vllm_tokens_prompt: false + samples_per_category: 10 + select_only_categories: null + split_by_category: false + truncate: false +prune_args: + n_experts_to_prune: null + overwrite_pruned_model: false + perserve_outliers: false + perserve_super_experts: false + prune_method: reap +reap_args: + debug: false + do_eval: false + plot_clusters: true + profile: false + run_observer_only: false + seed: 42 + smoke_test: true diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..9028cf84013844f17d7616bdec1d88e977924434 --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,40 @@ +{ + "additional_special_tokens": [ + "<|endoftext|>", + "[MASK]", + "[gMASK]", + "[sMASK]", + "", + "", + "<|system|>", + "<|user|>", + "<|assistant|>", + "<|observation|>", + "<|begin_of_image|>", + "<|end_of_image|>", + "<|begin_of_video|>", + "<|end_of_video|>", + "<|begin_of_audio|>", + "<|end_of_audio|>", + "<|begin_of_transcription|>", + "<|end_of_transcription|>", + "<|code_prefix|>", + "<|code_middle|>", + "<|code_suffix|>", + "/nothink" + ], + "eos_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..e3ed3c66baf1ec4de61840b0abf02142687bfed8 --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bda8e2146c3bb7b7e0fc96dcc4f0aeff041c6c27952e3ace0665663ebff346ba +size 19970700 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..75e11cfb2e0cc09f19391ec2278b4825a4c3fae9 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,325 @@ +{ + "added_tokens_decoder": { + "151329": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151330": { + "content": "[MASK]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151331": { + "content": "[gMASK]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151332": { + "content": "[sMASK]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151333": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151334": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151335": { + "content": "<|system|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151336": { + "content": "<|user|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151337": { + "content": "<|assistant|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151338": { + "content": "<|observation|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151339": { + "content": "<|begin_of_image|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151340": { + "content": "<|end_of_image|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151341": { + "content": "<|begin_of_video|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151342": { + "content": "<|end_of_video|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151343": { + "content": "<|begin_of_audio|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151344": { + "content": "<|end_of_audio|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151345": { + "content": "<|begin_of_transcription|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151346": { + "content": "<|end_of_transcription|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151347": { + "content": "<|code_prefix|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151348": { + "content": "<|code_middle|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151349": { + "content": "<|code_suffix|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151350": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151351": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151352": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151353": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151354": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151355": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151356": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151357": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151358": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151359": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151360": { + "content": "/nothink", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151361": { + "content": "<|begin_of_box|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151362": { + "content": "<|end_of_box|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151363": { + "content": "<|image|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151364": { + "content": "<|video|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "additional_special_tokens": [ + "<|endoftext|>", + "[MASK]", + "[gMASK]", + "[sMASK]", + "", + "", + "<|system|>", + "<|user|>", + "<|assistant|>", + "<|observation|>", + "<|begin_of_image|>", + "<|end_of_image|>", + "<|begin_of_video|>", + "<|end_of_video|>", + "<|begin_of_audio|>", + "<|end_of_audio|>", + "<|begin_of_transcription|>", + "<|end_of_transcription|>", + "<|code_prefix|>", + "<|code_middle|>", + "<|code_suffix|>", + "/nothink" + ], + "clean_up_tokenization_spaces": false, + "do_lower_case": false, + "eos_token": "<|endoftext|>", + "extra_special_tokens": {}, + "model_max_length": 128000, + "pad_token": "<|endoftext|>", + "padding_side": "left", + "remove_space": false, + "tokenizer_class": "PreTrainedTokenizerFast" +}