Lyaaaaaaaaaaaaaaa commited on May 6, 2023

Commit

6ab8c7a

1 Parent(s): 206ca26

Upload 42 files

Browse files

Files changed (42) hide show

config.json +82 -0
generation_config.json +7 -0
merges.txt +0 -0
pytorch_model-00001-of-00034.bin +3 -0
pytorch_model-00002-of-00034.bin +3 -0
pytorch_model-00003-of-00034.bin +3 -0
pytorch_model-00004-of-00034.bin +3 -0
pytorch_model-00005-of-00034.bin +3 -0
pytorch_model-00006-of-00034.bin +3 -0
pytorch_model-00007-of-00034.bin +3 -0
pytorch_model-00008-of-00034.bin +3 -0
pytorch_model-00009-of-00034.bin +3 -0
pytorch_model-00010-of-00034.bin +3 -0
pytorch_model-00011-of-00034.bin +3 -0
pytorch_model-00012-of-00034.bin +3 -0
pytorch_model-00013-of-00034.bin +3 -0
pytorch_model-00014-of-00034.bin +3 -0
pytorch_model-00015-of-00034.bin +3 -0
pytorch_model-00016-of-00034.bin +3 -0
pytorch_model-00017-of-00034.bin +3 -0
pytorch_model-00018-of-00034.bin +3 -0
pytorch_model-00019-of-00034.bin +3 -0
pytorch_model-00020-of-00034.bin +3 -0
pytorch_model-00021-of-00034.bin +3 -0
pytorch_model-00022-of-00034.bin +3 -0
pytorch_model-00023-of-00034.bin +3 -0
pytorch_model-00024-of-00034.bin +3 -0
pytorch_model-00025-of-00034.bin +3 -0
pytorch_model-00026-of-00034.bin +3 -0
pytorch_model-00027-of-00034.bin +3 -0
pytorch_model-00028-of-00034.bin +3 -0
pytorch_model-00029-of-00034.bin +3 -0
pytorch_model-00030-of-00034.bin +3 -0
pytorch_model-00031-of-00034.bin +3 -0
pytorch_model-00032-of-00034.bin +3 -0
pytorch_model-00033-of-00034.bin +3 -0
pytorch_model-00034-of-00034.bin +3 -0
pytorch_model.bin.index.json +491 -0
special_tokens_map.json +5 -0
tokenizer.json +0 -0
tokenizer_config.json +33 -0
vocab.json +0 -0

config.json ADDED Viewed

	@@ -0,0 +1,82 @@

+{
+  "_name_or_path": "KoboldAI/GPT-Neo-2.7B-Horni-LN",
+  "activation_function": "gelu_new",
+  "architectures": [
+    "GPTNeoForCausalLM"
+  ],
+  "attention_dropout": 0,
+  "attention_layers": [
+    "global",
+    "local",
+    "global",
+    "local",
+    "global",
+    "local",
+    "global",
+    "local",
+    "global",
+    "local",
+    "global",
+    "local",
+    "global",
+    "local",
+    "global",
+    "local",
+    "global",
+    "local",
+    "global",
+    "local",
+    "global",
+    "local",
+    "global",
+    "local",
+    "global",
+    "local",
+    "global",
+    "local",
+    "global",
+    "local",
+    "global",
+    "local"
+  ],
+  "attention_types": [
+    [
+      [
+        "global",
+        "local"
+      ],
+      16
+    ]
+  ],
+  "bos_token_id": 50256,
+  "embed_dropout": 0,
+  "eos_token_id": 50256,
+  "hidden_size": 2560,
+  "initializer_range": 0.02,
+  "intermediate_size": null,
+  "layer_norm_epsilon": 1e-05,
+  "max_position_embeddings": 2048,
+  "model_type": "gpt_neo",
+  "num_heads": 20,
+  "num_layers": 32,
+  "rep_pen": 2.0,
+  "resid_dropout": 0,
+  "summary_activation": null,
+  "summary_first_dropout": 0.1,
+  "summary_proj_to_labels": true,
+  "summary_type": "cls_index",
+  "summary_use_proj": true,
+  "task_specific_params": {
+    "text-generation": {
+      "do_sample": true,
+      "max_length": 50,
+      "temperature": 0.9
+    }
+  },
+  "tokenizer_class": "GPT2Tokenizer",
+  "torch_dtype": "float16",
+  "transformers_version": "4.27.4",
+  "use_cache": false,
+  "vocab_size": 50257,
+  "window_size": 256
+}

generation_config.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 50256,
+  "eos_token_id": 50256,
+  "transformers_version": "4.27.4",
+  "use_cache": false
+}

merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

pytorch_model-00001-of-00034.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f713a462ff20e675c62e450a60f51aed6f65430a165f82b4f709b539f4342361
+size 537

pytorch_model-00002-of-00034.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:42205b9e9763bd8223acfd8ce72c45bdd1e276b6a3a8a60fbe04e50c58d2245e
+size 257316778

pytorch_model-00003-of-00034.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:20339514539991f66aad688aaeda1c0d2610f51e4d9fa653a0ab1f5460ae6f51
+size 202443995

pytorch_model-00004-of-00034.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0fe9e125415f75350d2db554e34a441ef9e569058638f97686af5bced56c8929
+size 187768439

pytorch_model-00005-of-00034.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f7855816d582c49da39fe5598f8909dd5f9debbc68d705531deab740ca149708
+size 161537181

pytorch_model-00006-of-00034.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6d7b5520796bc522af163bcdb862790d5cac782b380fca4425c162f2c2b65d20
+size 161537181

pytorch_model-00007-of-00034.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f99c96c0e57fa36b91f739921b078a0bd4b53e928c174ea2691b21357e1132f2
+size 161537181

pytorch_model-00008-of-00034.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:76ee6e1ea4ba83c9437084c782fdbdd0a99df1737efedd2959f21f6594b61f1f
+size 161537181

pytorch_model-00009-of-00034.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:24f83608da9385532f3a7870c98856cbc95681d6c44737be8532bdc0f7930784
+size 161537181

pytorch_model-00010-of-00034.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:15adc83793dc53acfe5877406f68ce39480e743ceee14e1d6e4505888730b0ed
+size 161537181

pytorch_model-00011-of-00034.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:98ac6206044702595198d73b15cc87640d5e7c66c62c89fa4023cd739851d0a6
+size 161537181

pytorch_model-00012-of-00034.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ad4e8898662281f695e45108ca95431efd5432b1f970d48b3c1e7aeeb966f293
+size 161537181

pytorch_model-00013-of-00034.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bcc65a15a4dcc28bd70a50b8c4db3627494b747bd5a82720c77b23e665871aa1
+size 161537181

pytorch_model-00014-of-00034.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d51e95bee6d15079e181008c5f7452b9fee7515bd1e9a74d708e03d762e0ea1d
+size 161537181

pytorch_model-00015-of-00034.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ae65ae822cd7d83b655bc60f38a8604266a9de0904f41fa04d53874b1dde089c
+size 161537181

pytorch_model-00016-of-00034.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d16c7df701a0853dec8e5670ba005b9cc4dcfd250bcdd1006daaa74313464ccb
+size 161537181

pytorch_model-00017-of-00034.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:990719f8ff27c0f5a0a5017055a292f4ffaa484692fdfc5af3764b1882346957
+size 161537181

pytorch_model-00018-of-00034.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f314811e47f5620fafac4378a5dc2725dd36f3ffa0ae06591fa3275b02a9fb51
+size 161537181

pytorch_model-00019-of-00034.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2cdd872fba952925ca9d2daeae187cefbd262ffad1dc2897cbbe220a3c8af1c6
+size 161537181

pytorch_model-00020-of-00034.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:44bfcff9db4f1232d27c75f3040a4fcef8ce467e2509eabb3d747c5f4da90cd0
+size 161537181

pytorch_model-00021-of-00034.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:acb430ed8dea95c6929eb9848924100ebd7868ce16c3c1ff3a31b282461cac65
+size 161537181

pytorch_model-00022-of-00034.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:74d432cfea232bdf24e5ed6949868a799a7e4aa2eccaf54ad1180d5f6a85afac
+size 161537181

pytorch_model-00023-of-00034.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5c33061c1e4e7032c5cda7fed4ff241cb8bbdc8f02442fc0f488e3ca6b4e40a6
+size 161537181

pytorch_model-00024-of-00034.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c3b48ad29f623e4ccbbe238b4679f9ac42494cf4227d260522094151d5ba94e0
+size 161537181

pytorch_model-00025-of-00034.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ecb61b6b18bc1f40f9293385969c57c4c3dfca5e2276505cf9f24d534df20418
+size 161537181

pytorch_model-00026-of-00034.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3a3301b6897267a9dcfdba7d990a46d7a8af159f68081d4113faea7fe22196a8
+size 161537181

pytorch_model-00027-of-00034.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a8723ec6752f2c6dc41b90369a79d1b726856185fc0ff61d240d0fe03cd02b65
+size 161537181

pytorch_model-00028-of-00034.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:36f7768f9b8d60b875488300b67bf080211e6526aae89737978bb17bac853113
+size 161537181

pytorch_model-00029-of-00034.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2d1d2ce7fc7aa659642d2bc3097a4970e8a9cf62aa24cb2695630a93e2d83a86
+size 161537181

pytorch_model-00030-of-00034.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8bc21f4ac9a1522412f77d94c5776695947e3984d4232488df915a6c819e1b21
+size 161537181

pytorch_model-00031-of-00034.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ed6e1394b12888c897654c12668237d266177829fee5b41bf2d9103e379db82b
+size 161537181

pytorch_model-00032-of-00034.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d9d470e1b7df20e2cfbdd7a2c3570d7f28e1da6dffafea843365dbbe7dde5eb6
+size 161537181

pytorch_model-00033-of-00034.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:431f0d3803c4726461dd3fe8db85d48fb0edd263e32e23a6de804d0fce53e0d9
+size 161537181

pytorch_model-00034-of-00034.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:be6c181fffcf44a9bf1439013faaf339e9c171e1b9a5a5966611e372d4d91450
+size 104895871

pytorch_model.bin.index.json ADDED Viewed

	@@ -0,0 +1,491 @@

+{
+  "metadata": {
+    "total_size": 5319392320.0
+  },
+  "weight_map": {
+    "transformer.h.0.attn.attention.bias": "pytorch_model-00003-of-00034.bin",
+    "transformer.h.0.attn.attention.k_proj.weight": "pytorch_model-00003-of-00034.bin",
+    "transformer.h.0.attn.attention.masked_bias": "pytorch_model-00003-of-00034.bin",
+    "transformer.h.0.attn.attention.out_proj.bias": "pytorch_model-00003-of-00034.bin",
+    "transformer.h.0.attn.attention.out_proj.weight": "pytorch_model-00003-of-00034.bin",
+    "transformer.h.0.attn.attention.q_proj.weight": "pytorch_model-00003-of-00034.bin",
+    "transformer.h.0.attn.attention.v_proj.weight": "pytorch_model-00003-of-00034.bin",
+    "transformer.h.0.ln_1.bias": "pytorch_model-00003-of-00034.bin",
+    "transformer.h.0.ln_1.weight": "pytorch_model-00003-of-00034.bin",
+    "transformer.h.0.ln_2.bias": "pytorch_model-00003-of-00034.bin",
+    "transformer.h.0.ln_2.weight": "pytorch_model-00003-of-00034.bin",
+    "transformer.h.0.mlp.c_fc.bias": "pytorch_model-00003-of-00034.bin",
+    "transformer.h.0.mlp.c_fc.weight": "pytorch_model-00003-of-00034.bin",
+    "transformer.h.0.mlp.c_proj.bias": "pytorch_model-00003-of-00034.bin",
+    "transformer.h.0.mlp.c_proj.weight": "pytorch_model-00003-of-00034.bin",
+    "transformer.h.1.attn.attention.bias": "pytorch_model-00003-of-00034.bin",
+    "transformer.h.1.attn.attention.k_proj.weight": "pytorch_model-00003-of-00034.bin",
+    "transformer.h.1.attn.attention.masked_bias": "pytorch_model-00003-of-00034.bin",
+    "transformer.h.1.attn.attention.out_proj.bias": "pytorch_model-00004-of-00034.bin",
+    "transformer.h.1.attn.attention.out_proj.weight": "pytorch_model-00004-of-00034.bin",
+    "transformer.h.1.attn.attention.q_proj.weight": "pytorch_model-00004-of-00034.bin",
+    "transformer.h.1.attn.attention.v_proj.weight": "pytorch_model-00003-of-00034.bin",
+    "transformer.h.1.ln_1.bias": "pytorch_model-00003-of-00034.bin",
+    "transformer.h.1.ln_1.weight": "pytorch_model-00003-of-00034.bin",
+    "transformer.h.1.ln_2.bias": "pytorch_model-00004-of-00034.bin",
+    "transformer.h.1.ln_2.weight": "pytorch_model-00004-of-00034.bin",
+    "transformer.h.1.mlp.c_fc.bias": "pytorch_model-00004-of-00034.bin",
+    "transformer.h.1.mlp.c_fc.weight": "pytorch_model-00004-of-00034.bin",
+    "transformer.h.1.mlp.c_proj.bias": "pytorch_model-00004-of-00034.bin",
+    "transformer.h.1.mlp.c_proj.weight": "pytorch_model-00004-of-00034.bin",
+    "transformer.h.10.attn.attention.bias": "pytorch_model-00012-of-00034.bin",
+    "transformer.h.10.attn.attention.k_proj.weight": "pytorch_model-00012-of-00034.bin",
+    "transformer.h.10.attn.attention.masked_bias": "pytorch_model-00012-of-00034.bin",
+    "transformer.h.10.attn.attention.out_proj.bias": "pytorch_model-00012-of-00034.bin",
+    "transformer.h.10.attn.attention.out_proj.weight": "pytorch_model-00012-of-00034.bin",
+    "transformer.h.10.attn.attention.q_proj.weight": "pytorch_model-00012-of-00034.bin",
+    "transformer.h.10.attn.attention.v_proj.weight": "pytorch_model-00012-of-00034.bin",
+    "transformer.h.10.ln_1.bias": "pytorch_model-00012-of-00034.bin",
+    "transformer.h.10.ln_1.weight": "pytorch_model-00012-of-00034.bin",
+    "transformer.h.10.ln_2.bias": "pytorch_model-00012-of-00034.bin",
+    "transformer.h.10.ln_2.weight": "pytorch_model-00012-of-00034.bin",
+    "transformer.h.10.mlp.c_fc.bias": "pytorch_model-00013-of-00034.bin",
+    "transformer.h.10.mlp.c_fc.weight": "pytorch_model-00013-of-00034.bin",
+    "transformer.h.10.mlp.c_proj.bias": "pytorch_model-00013-of-00034.bin",
+    "transformer.h.10.mlp.c_proj.weight": "pytorch_model-00013-of-00034.bin",
+    "transformer.h.11.attn.attention.bias": "pytorch_model-00013-of-00034.bin",
+    "transformer.h.11.attn.attention.k_proj.weight": "pytorch_model-00013-of-00034.bin",
+    "transformer.h.11.attn.attention.masked_bias": "pytorch_model-00013-of-00034.bin",
+    "transformer.h.11.attn.attention.out_proj.bias": "pytorch_model-00013-of-00034.bin",
+    "transformer.h.11.attn.attention.out_proj.weight": "pytorch_model-00013-of-00034.bin",
+    "transformer.h.11.attn.attention.q_proj.weight": "pytorch_model-00013-of-00034.bin",
+    "transformer.h.11.attn.attention.v_proj.weight": "pytorch_model-00013-of-00034.bin",
+    "transformer.h.11.ln_1.bias": "pytorch_model-00013-of-00034.bin",
+    "transformer.h.11.ln_1.weight": "pytorch_model-00013-of-00034.bin",
+    "transformer.h.11.ln_2.bias": "pytorch_model-00013-of-00034.bin",
+    "transformer.h.11.ln_2.weight": "pytorch_model-00013-of-00034.bin",
+    "transformer.h.11.mlp.c_fc.bias": "pytorch_model-00014-of-00034.bin",
+    "transformer.h.11.mlp.c_fc.weight": "pytorch_model-00014-of-00034.bin",
+    "transformer.h.11.mlp.c_proj.bias": "pytorch_model-00014-of-00034.bin",
+    "transformer.h.11.mlp.c_proj.weight": "pytorch_model-00014-of-00034.bin",
+    "transformer.h.12.attn.attention.bias": "pytorch_model-00014-of-00034.bin",
+    "transformer.h.12.attn.attention.k_proj.weight": "pytorch_model-00014-of-00034.bin",
+    "transformer.h.12.attn.attention.masked_bias": "pytorch_model-00014-of-00034.bin",
+    "transformer.h.12.attn.attention.out_proj.bias": "pytorch_model-00014-of-00034.bin",
+    "transformer.h.12.attn.attention.out_proj.weight": "pytorch_model-00014-of-00034.bin",
+    "transformer.h.12.attn.attention.q_proj.weight": "pytorch_model-00014-of-00034.bin",
+    "transformer.h.12.attn.attention.v_proj.weight": "pytorch_model-00014-of-00034.bin",
+    "transformer.h.12.ln_1.bias": "pytorch_model-00014-of-00034.bin",
+    "transformer.h.12.ln_1.weight": "pytorch_model-00014-of-00034.bin",
+    "transformer.h.12.ln_2.bias": "pytorch_model-00014-of-00034.bin",
+    "transformer.h.12.ln_2.weight": "pytorch_model-00014-of-00034.bin",
+    "transformer.h.12.mlp.c_fc.bias": "pytorch_model-00015-of-00034.bin",
+    "transformer.h.12.mlp.c_fc.weight": "pytorch_model-00015-of-00034.bin",
+    "transformer.h.12.mlp.c_proj.bias": "pytorch_model-00015-of-00034.bin",
+    "transformer.h.12.mlp.c_proj.weight": "pytorch_model-00015-of-00034.bin",
+    "transformer.h.13.attn.attention.bias": "pytorch_model-00015-of-00034.bin",
+    "transformer.h.13.attn.attention.k_proj.weight": "pytorch_model-00015-of-00034.bin",
+    "transformer.h.13.attn.attention.masked_bias": "pytorch_model-00015-of-00034.bin",
+    "transformer.h.13.attn.attention.out_proj.bias": "pytorch_model-00015-of-00034.bin",
+    "transformer.h.13.attn.attention.out_proj.weight": "pytorch_model-00015-of-00034.bin",
+    "transformer.h.13.attn.attention.q_proj.weight": "pytorch_model-00015-of-00034.bin",
+    "transformer.h.13.attn.attention.v_proj.weight": "pytorch_model-00015-of-00034.bin",
+    "transformer.h.13.ln_1.bias": "pytorch_model-00015-of-00034.bin",
+    "transformer.h.13.ln_1.weight": "pytorch_model-00015-of-00034.bin",
+    "transformer.h.13.ln_2.bias": "pytorch_model-00015-of-00034.bin",
+    "transformer.h.13.ln_2.weight": "pytorch_model-00015-of-00034.bin",
+    "transformer.h.13.mlp.c_fc.bias": "pytorch_model-00016-of-00034.bin",
+    "transformer.h.13.mlp.c_fc.weight": "pytorch_model-00016-of-00034.bin",
+    "transformer.h.13.mlp.c_proj.bias": "pytorch_model-00016-of-00034.bin",
+    "transformer.h.13.mlp.c_proj.weight": "pytorch_model-00016-of-00034.bin",
+    "transformer.h.14.attn.attention.bias": "pytorch_model-00016-of-00034.bin",
+    "transformer.h.14.attn.attention.k_proj.weight": "pytorch_model-00016-of-00034.bin",
+    "transformer.h.14.attn.attention.masked_bias": "pytorch_model-00016-of-00034.bin",
+    "transformer.h.14.attn.attention.out_proj.bias": "pytorch_model-00016-of-00034.bin",
+    "transformer.h.14.attn.attention.out_proj.weight": "pytorch_model-00016-of-00034.bin",
+    "transformer.h.14.attn.attention.q_proj.weight": "pytorch_model-00016-of-00034.bin",
+    "transformer.h.14.attn.attention.v_proj.weight": "pytorch_model-00016-of-00034.bin",
+    "transformer.h.14.ln_1.bias": "pytorch_model-00016-of-00034.bin",
+    "transformer.h.14.ln_1.weight": "pytorch_model-00016-of-00034.bin",
+    "transformer.h.14.ln_2.bias": "pytorch_model-00016-of-00034.bin",
+    "transformer.h.14.ln_2.weight": "pytorch_model-00016-of-00034.bin",
+    "transformer.h.14.mlp.c_fc.bias": "pytorch_model-00017-of-00034.bin",
+    "transformer.h.14.mlp.c_fc.weight": "pytorch_model-00017-of-00034.bin",
+    "transformer.h.14.mlp.c_proj.bias": "pytorch_model-00017-of-00034.bin",
+    "transformer.h.14.mlp.c_proj.weight": "pytorch_model-00017-of-00034.bin",
+    "transformer.h.15.attn.attention.bias": "pytorch_model-00017-of-00034.bin",
+    "transformer.h.15.attn.attention.k_proj.weight": "pytorch_model-00017-of-00034.bin",
+    "transformer.h.15.attn.attention.masked_bias": "pytorch_model-00017-of-00034.bin",
+    "transformer.h.15.attn.attention.out_proj.bias": "pytorch_model-00017-of-00034.bin",
+    "transformer.h.15.attn.attention.out_proj.weight": "pytorch_model-00017-of-00034.bin",
+    "transformer.h.15.attn.attention.q_proj.weight": "pytorch_model-00017-of-00034.bin",
+    "transformer.h.15.attn.attention.v_proj.weight": "pytorch_model-00017-of-00034.bin",
+    "transformer.h.15.ln_1.bias": "pytorch_model-00017-of-00034.bin",
+    "transformer.h.15.ln_1.weight": "pytorch_model-00017-of-00034.bin",
+    "transformer.h.15.ln_2.bias": "pytorch_model-00017-of-00034.bin",
+    "transformer.h.15.ln_2.weight": "pytorch_model-00017-of-00034.bin",
+    "transformer.h.15.mlp.c_fc.bias": "pytorch_model-00018-of-00034.bin",
+    "transformer.h.15.mlp.c_fc.weight": "pytorch_model-00018-of-00034.bin",
+    "transformer.h.15.mlp.c_proj.bias": "pytorch_model-00018-of-00034.bin",
+    "transformer.h.15.mlp.c_proj.weight": "pytorch_model-00018-of-00034.bin",
+    "transformer.h.16.attn.attention.bias": "pytorch_model-00018-of-00034.bin",
+    "transformer.h.16.attn.attention.k_proj.weight": "pytorch_model-00018-of-00034.bin",
+    "transformer.h.16.attn.attention.masked_bias": "pytorch_model-00018-of-00034.bin",
+    "transformer.h.16.attn.attention.out_proj.bias": "pytorch_model-00018-of-00034.bin",
+    "transformer.h.16.attn.attention.out_proj.weight": "pytorch_model-00018-of-00034.bin",
+    "transformer.h.16.attn.attention.q_proj.weight": "pytorch_model-00018-of-00034.bin",
+    "transformer.h.16.attn.attention.v_proj.weight": "pytorch_model-00018-of-00034.bin",
+    "transformer.h.16.ln_1.bias": "pytorch_model-00018-of-00034.bin",
+    "transformer.h.16.ln_1.weight": "pytorch_model-00018-of-00034.bin",
+    "transformer.h.16.ln_2.bias": "pytorch_model-00018-of-00034.bin",
+    "transformer.h.16.ln_2.weight": "pytorch_model-00018-of-00034.bin",
+    "transformer.h.16.mlp.c_fc.bias": "pytorch_model-00019-of-00034.bin",
+    "transformer.h.16.mlp.c_fc.weight": "pytorch_model-00019-of-00034.bin",
+    "transformer.h.16.mlp.c_proj.bias": "pytorch_model-00019-of-00034.bin",
+    "transformer.h.16.mlp.c_proj.weight": "pytorch_model-00019-of-00034.bin",
+    "transformer.h.17.attn.attention.bias": "pytorch_model-00019-of-00034.bin",
+    "transformer.h.17.attn.attention.k_proj.weight": "pytorch_model-00019-of-00034.bin",
+    "transformer.h.17.attn.attention.masked_bias": "pytorch_model-00019-of-00034.bin",
+    "transformer.h.17.attn.attention.out_proj.bias": "pytorch_model-00019-of-00034.bin",
+    "transformer.h.17.attn.attention.out_proj.weight": "pytorch_model-00019-of-00034.bin",
+    "transformer.h.17.attn.attention.q_proj.weight": "pytorch_model-00019-of-00034.bin",
+    "transformer.h.17.attn.attention.v_proj.weight": "pytorch_model-00019-of-00034.bin",
+    "transformer.h.17.ln_1.bias": "pytorch_model-00019-of-00034.bin",
+    "transformer.h.17.ln_1.weight": "pytorch_model-00019-of-00034.bin",
+    "transformer.h.17.ln_2.bias": "pytorch_model-00019-of-00034.bin",
+    "transformer.h.17.ln_2.weight": "pytorch_model-00019-of-00034.bin",
+    "transformer.h.17.mlp.c_fc.bias": "pytorch_model-00020-of-00034.bin",
+    "transformer.h.17.mlp.c_fc.weight": "pytorch_model-00020-of-00034.bin",
+    "transformer.h.17.mlp.c_proj.bias": "pytorch_model-00020-of-00034.bin",
+    "transformer.h.17.mlp.c_proj.weight": "pytorch_model-00020-of-00034.bin",
+    "transformer.h.18.attn.attention.bias": "pytorch_model-00020-of-00034.bin",
+    "transformer.h.18.attn.attention.k_proj.weight": "pytorch_model-00020-of-00034.bin",
+    "transformer.h.18.attn.attention.masked_bias": "pytorch_model-00020-of-00034.bin",
+    "transformer.h.18.attn.attention.out_proj.bias": "pytorch_model-00020-of-00034.bin",
+    "transformer.h.18.attn.attention.out_proj.weight": "pytorch_model-00020-of-00034.bin",
+    "transformer.h.18.attn.attention.q_proj.weight": "pytorch_model-00020-of-00034.bin",
+    "transformer.h.18.attn.attention.v_proj.weight": "pytorch_model-00020-of-00034.bin",
+    "transformer.h.18.ln_1.bias": "pytorch_model-00020-of-00034.bin",
+    "transformer.h.18.ln_1.weight": "pytorch_model-00020-of-00034.bin",
+    "transformer.h.18.ln_2.bias": "pytorch_model-00020-of-00034.bin",
+    "transformer.h.18.ln_2.weight": "pytorch_model-00020-of-00034.bin",
+    "transformer.h.18.mlp.c_fc.bias": "pytorch_model-00021-of-00034.bin",
+    "transformer.h.18.mlp.c_fc.weight": "pytorch_model-00021-of-00034.bin",
+    "transformer.h.18.mlp.c_proj.bias": "pytorch_model-00021-of-00034.bin",
+    "transformer.h.18.mlp.c_proj.weight": "pytorch_model-00021-of-00034.bin",
+    "transformer.h.19.attn.attention.bias": "pytorch_model-00021-of-00034.bin",
+    "transformer.h.19.attn.attention.k_proj.weight": "pytorch_model-00021-of-00034.bin",
+    "transformer.h.19.attn.attention.masked_bias": "pytorch_model-00021-of-00034.bin",
+    "transformer.h.19.attn.attention.out_proj.bias": "pytorch_model-00021-of-00034.bin",
+    "transformer.h.19.attn.attention.out_proj.weight": "pytorch_model-00021-of-00034.bin",
+    "transformer.h.19.attn.attention.q_proj.weight": "pytorch_model-00021-of-00034.bin",
+    "transformer.h.19.attn.attention.v_proj.weight": "pytorch_model-00021-of-00034.bin",
+    "transformer.h.19.ln_1.bias": "pytorch_model-00021-of-00034.bin",
+    "transformer.h.19.ln_1.weight": "pytorch_model-00021-of-00034.bin",
+    "transformer.h.19.ln_2.bias": "pytorch_model-00021-of-00034.bin",
+    "transformer.h.19.ln_2.weight": "pytorch_model-00021-of-00034.bin",
+    "transformer.h.19.mlp.c_fc.bias": "pytorch_model-00022-of-00034.bin",
+    "transformer.h.19.mlp.c_fc.weight": "pytorch_model-00022-of-00034.bin",
+    "transformer.h.19.mlp.c_proj.bias": "pytorch_model-00022-of-00034.bin",
+    "transformer.h.19.mlp.c_proj.weight": "pytorch_model-00022-of-00034.bin",
+    "transformer.h.2.attn.attention.bias": "pytorch_model-00004-of-00034.bin",
+    "transformer.h.2.attn.attention.k_proj.weight": "pytorch_model-00004-of-00034.bin",
+    "transformer.h.2.attn.attention.masked_bias": "pytorch_model-00004-of-00034.bin",
+    "transformer.h.2.attn.attention.out_proj.bias": "pytorch_model-00004-of-00034.bin",
+    "transformer.h.2.attn.attention.out_proj.weight": "pytorch_model-00004-of-00034.bin",
+    "transformer.h.2.attn.attention.q_proj.weight": "pytorch_model-00004-of-00034.bin",
+    "transformer.h.2.attn.attention.v_proj.weight": "pytorch_model-00004-of-00034.bin",
+    "transformer.h.2.ln_1.bias": "pytorch_model-00004-of-00034.bin",
+    "transformer.h.2.ln_1.weight": "pytorch_model-00004-of-00034.bin",
+    "transformer.h.2.ln_2.bias": "pytorch_model-00004-of-00034.bin",
+    "transformer.h.2.ln_2.weight": "pytorch_model-00004-of-00034.bin",
+    "transformer.h.2.mlp.c_fc.bias": "pytorch_model-00005-of-00034.bin",
+    "transformer.h.2.mlp.c_fc.weight": "pytorch_model-00005-of-00034.bin",
+    "transformer.h.2.mlp.c_proj.bias": "pytorch_model-00005-of-00034.bin",
+    "transformer.h.2.mlp.c_proj.weight": "pytorch_model-00005-of-00034.bin",
+    "transformer.h.20.attn.attention.bias": "pytorch_model-00022-of-00034.bin",
+    "transformer.h.20.attn.attention.k_proj.weight": "pytorch_model-00022-of-00034.bin",
+    "transformer.h.20.attn.attention.masked_bias": "pytorch_model-00022-of-00034.bin",
+    "transformer.h.20.attn.attention.out_proj.bias": "pytorch_model-00022-of-00034.bin",
+    "transformer.h.20.attn.attention.out_proj.weight": "pytorch_model-00022-of-00034.bin",
+    "transformer.h.20.attn.attention.q_proj.weight": "pytorch_model-00022-of-00034.bin",
+    "transformer.h.20.attn.attention.v_proj.weight": "pytorch_model-00022-of-00034.bin",
+    "transformer.h.20.ln_1.bias": "pytorch_model-00022-of-00034.bin",
+    "transformer.h.20.ln_1.weight": "pytorch_model-00022-of-00034.bin",
+    "transformer.h.20.ln_2.bias": "pytorch_model-00022-of-00034.bin",
+    "transformer.h.20.ln_2.weight": "pytorch_model-00022-of-00034.bin",
+    "transformer.h.20.mlp.c_fc.bias": "pytorch_model-00023-of-00034.bin",
+    "transformer.h.20.mlp.c_fc.weight": "pytorch_model-00023-of-00034.bin",
+    "transformer.h.20.mlp.c_proj.bias": "pytorch_model-00023-of-00034.bin",
+    "transformer.h.20.mlp.c_proj.weight": "pytorch_model-00023-of-00034.bin",
+    "transformer.h.21.attn.attention.bias": "pytorch_model-00023-of-00034.bin",
+    "transformer.h.21.attn.attention.k_proj.weight": "pytorch_model-00023-of-00034.bin",
+    "transformer.h.21.attn.attention.masked_bias": "pytorch_model-00023-of-00034.bin",
+    "transformer.h.21.attn.attention.out_proj.bias": "pytorch_model-00023-of-00034.bin",
+    "transformer.h.21.attn.attention.out_proj.weight": "pytorch_model-00023-of-00034.bin",
+    "transformer.h.21.attn.attention.q_proj.weight": "pytorch_model-00023-of-00034.bin",
+    "transformer.h.21.attn.attention.v_proj.weight": "pytorch_model-00023-of-00034.bin",
+    "transformer.h.21.ln_1.bias": "pytorch_model-00023-of-00034.bin",
+    "transformer.h.21.ln_1.weight": "pytorch_model-00023-of-00034.bin",
+    "transformer.h.21.ln_2.bias": "pytorch_model-00023-of-00034.bin",
+    "transformer.h.21.ln_2.weight": "pytorch_model-00023-of-00034.bin",
+    "transformer.h.21.mlp.c_fc.bias": "pytorch_model-00024-of-00034.bin",
+    "transformer.h.21.mlp.c_fc.weight": "pytorch_model-00024-of-00034.bin",
+    "transformer.h.21.mlp.c_proj.bias": "pytorch_model-00024-of-00034.bin",
+    "transformer.h.21.mlp.c_proj.weight": "pytorch_model-00024-of-00034.bin",
+    "transformer.h.22.attn.attention.bias": "pytorch_model-00024-of-00034.bin",
+    "transformer.h.22.attn.attention.k_proj.weight": "pytorch_model-00024-of-00034.bin",
+    "transformer.h.22.attn.attention.masked_bias": "pytorch_model-00024-of-00034.bin",
+    "transformer.h.22.attn.attention.out_proj.bias": "pytorch_model-00024-of-00034.bin",
+    "transformer.h.22.attn.attention.out_proj.weight": "pytorch_model-00024-of-00034.bin",
+    "transformer.h.22.attn.attention.q_proj.weight": "pytorch_model-00024-of-00034.bin",
+    "transformer.h.22.attn.attention.v_proj.weight": "pytorch_model-00024-of-00034.bin",
+    "transformer.h.22.ln_1.bias": "pytorch_model-00024-of-00034.bin",
+    "transformer.h.22.ln_1.weight": "pytorch_model-00024-of-00034.bin",
+    "transformer.h.22.ln_2.bias": "pytorch_model-00024-of-00034.bin",
+    "transformer.h.22.ln_2.weight": "pytorch_model-00024-of-00034.bin",
+    "transformer.h.22.mlp.c_fc.bias": "pytorch_model-00025-of-00034.bin",
+    "transformer.h.22.mlp.c_fc.weight": "pytorch_model-00025-of-00034.bin",
+    "transformer.h.22.mlp.c_proj.bias": "pytorch_model-00025-of-00034.bin",
+    "transformer.h.22.mlp.c_proj.weight": "pytorch_model-00025-of-00034.bin",
+    "transformer.h.23.attn.attention.bias": "pytorch_model-00025-of-00034.bin",
+    "transformer.h.23.attn.attention.k_proj.weight": "pytorch_model-00025-of-00034.bin",
+    "transformer.h.23.attn.attention.masked_bias": "pytorch_model-00025-of-00034.bin",
+    "transformer.h.23.attn.attention.out_proj.bias": "pytorch_model-00025-of-00034.bin",
+    "transformer.h.23.attn.attention.out_proj.weight": "pytorch_model-00025-of-00034.bin",
+    "transformer.h.23.attn.attention.q_proj.weight": "pytorch_model-00025-of-00034.bin",
+    "transformer.h.23.attn.attention.v_proj.weight": "pytorch_model-00025-of-00034.bin",
+    "transformer.h.23.ln_1.bias": "pytorch_model-00025-of-00034.bin",
+    "transformer.h.23.ln_1.weight": "pytorch_model-00025-of-00034.bin",
+    "transformer.h.23.ln_2.bias": "pytorch_model-00025-of-00034.bin",
+    "transformer.h.23.ln_2.weight": "pytorch_model-00025-of-00034.bin",
+    "transformer.h.23.mlp.c_fc.bias": "pytorch_model-00026-of-00034.bin",
+    "transformer.h.23.mlp.c_fc.weight": "pytorch_model-00026-of-00034.bin",
+    "transformer.h.23.mlp.c_proj.bias": "pytorch_model-00026-of-00034.bin",
+    "transformer.h.23.mlp.c_proj.weight": "pytorch_model-00026-of-00034.bin",
+    "transformer.h.24.attn.attention.bias": "pytorch_model-00026-of-00034.bin",
+    "transformer.h.24.attn.attention.k_proj.weight": "pytorch_model-00026-of-00034.bin",
+    "transformer.h.24.attn.attention.masked_bias": "pytorch_model-00026-of-00034.bin",
+    "transformer.h.24.attn.attention.out_proj.bias": "pytorch_model-00026-of-00034.bin",
+    "transformer.h.24.attn.attention.out_proj.weight": "pytorch_model-00026-of-00034.bin",
+    "transformer.h.24.attn.attention.q_proj.weight": "pytorch_model-00026-of-00034.bin",
+    "transformer.h.24.attn.attention.v_proj.weight": "pytorch_model-00026-of-00034.bin",
+    "transformer.h.24.ln_1.bias": "pytorch_model-00026-of-00034.bin",
+    "transformer.h.24.ln_1.weight": "pytorch_model-00026-of-00034.bin",
+    "transformer.h.24.ln_2.bias": "pytorch_model-00026-of-00034.bin",
+    "transformer.h.24.ln_2.weight": "pytorch_model-00026-of-00034.bin",
+    "transformer.h.24.mlp.c_fc.bias": "pytorch_model-00027-of-00034.bin",
+    "transformer.h.24.mlp.c_fc.weight": "pytorch_model-00027-of-00034.bin",
+    "transformer.h.24.mlp.c_proj.bias": "pytorch_model-00027-of-00034.bin",
+    "transformer.h.24.mlp.c_proj.weight": "pytorch_model-00027-of-00034.bin",
+    "transformer.h.25.attn.attention.bias": "pytorch_model-00027-of-00034.bin",
+    "transformer.h.25.attn.attention.k_proj.weight": "pytorch_model-00027-of-00034.bin",
+    "transformer.h.25.attn.attention.masked_bias": "pytorch_model-00027-of-00034.bin",
+    "transformer.h.25.attn.attention.out_proj.bias": "pytorch_model-00027-of-00034.bin",
+    "transformer.h.25.attn.attention.out_proj.weight": "pytorch_model-00027-of-00034.bin",
+    "transformer.h.25.attn.attention.q_proj.weight": "pytorch_model-00027-of-00034.bin",
+    "transformer.h.25.attn.attention.v_proj.weight": "pytorch_model-00027-of-00034.bin",
+    "transformer.h.25.ln_1.bias": "pytorch_model-00027-of-00034.bin",
+    "transformer.h.25.ln_1.weight": "pytorch_model-00027-of-00034.bin",
+    "transformer.h.25.ln_2.bias": "pytorch_model-00027-of-00034.bin",
+    "transformer.h.25.ln_2.weight": "pytorch_model-00027-of-00034.bin",
+    "transformer.h.25.mlp.c_fc.bias": "pytorch_model-00028-of-00034.bin",
+    "transformer.h.25.mlp.c_fc.weight": "pytorch_model-00028-of-00034.bin",
+    "transformer.h.25.mlp.c_proj.bias": "pytorch_model-00028-of-00034.bin",
+    "transformer.h.25.mlp.c_proj.weight": "pytorch_model-00028-of-00034.bin",
+    "transformer.h.26.attn.attention.bias": "pytorch_model-00028-of-00034.bin",
+    "transformer.h.26.attn.attention.k_proj.weight": "pytorch_model-00028-of-00034.bin",
+    "transformer.h.26.attn.attention.masked_bias": "pytorch_model-00028-of-00034.bin",
+    "transformer.h.26.attn.attention.out_proj.bias": "pytorch_model-00028-of-00034.bin",
+    "transformer.h.26.attn.attention.out_proj.weight": "pytorch_model-00028-of-00034.bin",
+    "transformer.h.26.attn.attention.q_proj.weight": "pytorch_model-00028-of-00034.bin",
+    "transformer.h.26.attn.attention.v_proj.weight": "pytorch_model-00028-of-00034.bin",
+    "transformer.h.26.ln_1.bias": "pytorch_model-00028-of-00034.bin",
+    "transformer.h.26.ln_1.weight": "pytorch_model-00028-of-00034.bin",
+    "transformer.h.26.ln_2.bias": "pytorch_model-00028-of-00034.bin",
+    "transformer.h.26.ln_2.weight": "pytorch_model-00028-of-00034.bin",
+    "transformer.h.26.mlp.c_fc.bias": "pytorch_model-00029-of-00034.bin",
+    "transformer.h.26.mlp.c_fc.weight": "pytorch_model-00029-of-00034.bin",
+    "transformer.h.26.mlp.c_proj.bias": "pytorch_model-00029-of-00034.bin",
+    "transformer.h.26.mlp.c_proj.weight": "pytorch_model-00029-of-00034.bin",
+    "transformer.h.27.attn.attention.bias": "pytorch_model-00029-of-00034.bin",
+    "transformer.h.27.attn.attention.k_proj.weight": "pytorch_model-00029-of-00034.bin",
+    "transformer.h.27.attn.attention.masked_bias": "pytorch_model-00029-of-00034.bin",
+    "transformer.h.27.attn.attention.out_proj.bias": "pytorch_model-00029-of-00034.bin",
+    "transformer.h.27.attn.attention.out_proj.weight": "pytorch_model-00029-of-00034.bin",
+    "transformer.h.27.attn.attention.q_proj.weight": "pytorch_model-00029-of-00034.bin",
+    "transformer.h.27.attn.attention.v_proj.weight": "pytorch_model-00029-of-00034.bin",
+    "transformer.h.27.ln_1.bias": "pytorch_model-00029-of-00034.bin",
+    "transformer.h.27.ln_1.weight": "pytorch_model-00029-of-00034.bin",
+    "transformer.h.27.ln_2.bias": "pytorch_model-00029-of-00034.bin",
+    "transformer.h.27.ln_2.weight": "pytorch_model-00029-of-00034.bin",
+    "transformer.h.27.mlp.c_fc.bias": "pytorch_model-00030-of-00034.bin",
+    "transformer.h.27.mlp.c_fc.weight": "pytorch_model-00030-of-00034.bin",
+    "transformer.h.27.mlp.c_proj.bias": "pytorch_model-00030-of-00034.bin",
+    "transformer.h.27.mlp.c_proj.weight": "pytorch_model-00030-of-00034.bin",
+    "transformer.h.28.attn.attention.bias": "pytorch_model-00030-of-00034.bin",
+    "transformer.h.28.attn.attention.k_proj.weight": "pytorch_model-00030-of-00034.bin",
+    "transformer.h.28.attn.attention.masked_bias": "pytorch_model-00030-of-00034.bin",
+    "transformer.h.28.attn.attention.out_proj.bias": "pytorch_model-00030-of-00034.bin",
+    "transformer.h.28.attn.attention.out_proj.weight": "pytorch_model-00030-of-00034.bin",
+    "transformer.h.28.attn.attention.q_proj.weight": "pytorch_model-00030-of-00034.bin",
+    "transformer.h.28.attn.attention.v_proj.weight": "pytorch_model-00030-of-00034.bin",
+    "transformer.h.28.ln_1.bias": "pytorch_model-00030-of-00034.bin",
+    "transformer.h.28.ln_1.weight": "pytorch_model-00030-of-00034.bin",
+    "transformer.h.28.ln_2.bias": "pytorch_model-00030-of-00034.bin",
+    "transformer.h.28.ln_2.weight": "pytorch_model-00030-of-00034.bin",
+    "transformer.h.28.mlp.c_fc.bias": "pytorch_model-00031-of-00034.bin",
+    "transformer.h.28.mlp.c_fc.weight": "pytorch_model-00031-of-00034.bin",
+    "transformer.h.28.mlp.c_proj.bias": "pytorch_model-00031-of-00034.bin",
+    "transformer.h.28.mlp.c_proj.weight": "pytorch_model-00031-of-00034.bin",
+    "transformer.h.29.attn.attention.bias": "pytorch_model-00031-of-00034.bin",
+    "transformer.h.29.attn.attention.k_proj.weight": "pytorch_model-00031-of-00034.bin",
+    "transformer.h.29.attn.attention.masked_bias": "pytorch_model-00031-of-00034.bin",
+    "transformer.h.29.attn.attention.out_proj.bias": "pytorch_model-00031-of-00034.bin",
+    "transformer.h.29.attn.attention.out_proj.weight": "pytorch_model-00031-of-00034.bin",
+    "transformer.h.29.attn.attention.q_proj.weight": "pytorch_model-00031-of-00034.bin",
+    "transformer.h.29.attn.attention.v_proj.weight": "pytorch_model-00031-of-00034.bin",
+    "transformer.h.29.ln_1.bias": "pytorch_model-00031-of-00034.bin",
+    "transformer.h.29.ln_1.weight": "pytorch_model-00031-of-00034.bin",
+    "transformer.h.29.ln_2.bias": "pytorch_model-00031-of-00034.bin",
+    "transformer.h.29.ln_2.weight": "pytorch_model-00031-of-00034.bin",
+    "transformer.h.29.mlp.c_fc.bias": "pytorch_model-00032-of-00034.bin",
+    "transformer.h.29.mlp.c_fc.weight": "pytorch_model-00032-of-00034.bin",
+    "transformer.h.29.mlp.c_proj.bias": "pytorch_model-00032-of-00034.bin",
+    "transformer.h.29.mlp.c_proj.weight": "pytorch_model-00032-of-00034.bin",
+    "transformer.h.3.attn.attention.bias": "pytorch_model-00005-of-00034.bin",
+    "transformer.h.3.attn.attention.k_proj.weight": "pytorch_model-00005-of-00034.bin",
+    "transformer.h.3.attn.attention.masked_bias": "pytorch_model-00005-of-00034.bin",
+    "transformer.h.3.attn.attention.out_proj.bias": "pytorch_model-00005-of-00034.bin",
+    "transformer.h.3.attn.attention.out_proj.weight": "pytorch_model-00005-of-00034.bin",
+    "transformer.h.3.attn.attention.q_proj.weight": "pytorch_model-00005-of-00034.bin",
+    "transformer.h.3.attn.attention.v_proj.weight": "pytorch_model-00005-of-00034.bin",
+    "transformer.h.3.ln_1.bias": "pytorch_model-00005-of-00034.bin",
+    "transformer.h.3.ln_1.weight": "pytorch_model-00005-of-00034.bin",
+    "transformer.h.3.ln_2.bias": "pytorch_model-00005-of-00034.bin",
+    "transformer.h.3.ln_2.weight": "pytorch_model-00005-of-00034.bin",
+    "transformer.h.3.mlp.c_fc.bias": "pytorch_model-00006-of-00034.bin",
+    "transformer.h.3.mlp.c_fc.weight": "pytorch_model-00006-of-00034.bin",
+    "transformer.h.3.mlp.c_proj.bias": "pytorch_model-00006-of-00034.bin",
+    "transformer.h.3.mlp.c_proj.weight": "pytorch_model-00006-of-00034.bin",
+    "transformer.h.30.attn.attention.bias": "pytorch_model-00032-of-00034.bin",
+    "transformer.h.30.attn.attention.k_proj.weight": "pytorch_model-00032-of-00034.bin",
+    "transformer.h.30.attn.attention.masked_bias": "pytorch_model-00032-of-00034.bin",
+    "transformer.h.30.attn.attention.out_proj.bias": "pytorch_model-00032-of-00034.bin",
+    "transformer.h.30.attn.attention.out_proj.weight": "pytorch_model-00032-of-00034.bin",
+    "transformer.h.30.attn.attention.q_proj.weight": "pytorch_model-00032-of-00034.bin",
+    "transformer.h.30.attn.attention.v_proj.weight": "pytorch_model-00032-of-00034.bin",
+    "transformer.h.30.ln_1.bias": "pytorch_model-00032-of-00034.bin",
+    "transformer.h.30.ln_1.weight": "pytorch_model-00032-of-00034.bin",
+    "transformer.h.30.ln_2.bias": "pytorch_model-00032-of-00034.bin",
+    "transformer.h.30.ln_2.weight": "pytorch_model-00032-of-00034.bin",
+    "transformer.h.30.mlp.c_fc.bias": "pytorch_model-00033-of-00034.bin",
+    "transformer.h.30.mlp.c_fc.weight": "pytorch_model-00033-of-00034.bin",
+    "transformer.h.30.mlp.c_proj.bias": "pytorch_model-00033-of-00034.bin",
+    "transformer.h.30.mlp.c_proj.weight": "pytorch_model-00033-of-00034.bin",
+    "transformer.h.31.attn.attention.bias": "pytorch_model-00033-of-00034.bin",
+    "transformer.h.31.attn.attention.k_proj.weight": "pytorch_model-00033-of-00034.bin",
+    "transformer.h.31.attn.attention.masked_bias": "pytorch_model-00033-of-00034.bin",
+    "transformer.h.31.attn.attention.out_proj.bias": "pytorch_model-00033-of-00034.bin",
+    "transformer.h.31.attn.attention.out_proj.weight": "pytorch_model-00033-of-00034.bin",
+    "transformer.h.31.attn.attention.q_proj.weight": "pytorch_model-00033-of-00034.bin",
+    "transformer.h.31.attn.attention.v_proj.weight": "pytorch_model-00033-of-00034.bin",
+    "transformer.h.31.ln_1.bias": "pytorch_model-00033-of-00034.bin",
+    "transformer.h.31.ln_1.weight": "pytorch_model-00033-of-00034.bin",
+    "transformer.h.31.ln_2.bias": "pytorch_model-00033-of-00034.bin",
+    "transformer.h.31.ln_2.weight": "pytorch_model-00033-of-00034.bin",
+    "transformer.h.31.mlp.c_fc.bias": "pytorch_model-00034-of-00034.bin",
+    "transformer.h.31.mlp.c_fc.weight": "pytorch_model-00034-of-00034.bin",
+    "transformer.h.31.mlp.c_proj.bias": "pytorch_model-00034-of-00034.bin",
+    "transformer.h.31.mlp.c_proj.weight": "pytorch_model-00034-of-00034.bin",
+    "transformer.h.4.attn.attention.bias": "pytorch_model-00006-of-00034.bin",
+    "transformer.h.4.attn.attention.k_proj.weight": "pytorch_model-00006-of-00034.bin",
+    "transformer.h.4.attn.attention.masked_bias": "pytorch_model-00006-of-00034.bin",
+    "transformer.h.4.attn.attention.out_proj.bias": "pytorch_model-00006-of-00034.bin",
+    "transformer.h.4.attn.attention.out_proj.weight": "pytorch_model-00006-of-00034.bin",
+    "transformer.h.4.attn.attention.q_proj.weight": "pytorch_model-00006-of-00034.bin",
+    "transformer.h.4.attn.attention.v_proj.weight": "pytorch_model-00006-of-00034.bin",
+    "transformer.h.4.ln_1.bias": "pytorch_model-00006-of-00034.bin",
+    "transformer.h.4.ln_1.weight": "pytorch_model-00006-of-00034.bin",
+    "transformer.h.4.ln_2.bias": "pytorch_model-00006-of-00034.bin",
+    "transformer.h.4.ln_2.weight": "pytorch_model-00006-of-00034.bin",
+    "transformer.h.4.mlp.c_fc.bias": "pytorch_model-00007-of-00034.bin",
+    "transformer.h.4.mlp.c_fc.weight": "pytorch_model-00007-of-00034.bin",
+    "transformer.h.4.mlp.c_proj.bias": "pytorch_model-00007-of-00034.bin",
+    "transformer.h.4.mlp.c_proj.weight": "pytorch_model-00007-of-00034.bin",
+    "transformer.h.5.attn.attention.bias": "pytorch_model-00007-of-00034.bin",
+    "transformer.h.5.attn.attention.k_proj.weight": "pytorch_model-00007-of-00034.bin",
+    "transformer.h.5.attn.attention.masked_bias": "pytorch_model-00007-of-00034.bin",
+    "transformer.h.5.attn.attention.out_proj.bias": "pytorch_model-00007-of-00034.bin",
+    "transformer.h.5.attn.attention.out_proj.weight": "pytorch_model-00007-of-00034.bin",
+    "transformer.h.5.attn.attention.q_proj.weight": "pytorch_model-00007-of-00034.bin",
+    "transformer.h.5.attn.attention.v_proj.weight": "pytorch_model-00007-of-00034.bin",
+    "transformer.h.5.ln_1.bias": "pytorch_model-00007-of-00034.bin",
+    "transformer.h.5.ln_1.weight": "pytorch_model-00007-of-00034.bin",
+    "transformer.h.5.ln_2.bias": "pytorch_model-00007-of-00034.bin",
+    "transformer.h.5.ln_2.weight": "pytorch_model-00007-of-00034.bin",
+    "transformer.h.5.mlp.c_fc.bias": "pytorch_model-00008-of-00034.bin",
+    "transformer.h.5.mlp.c_fc.weight": "pytorch_model-00008-of-00034.bin",
+    "transformer.h.5.mlp.c_proj.bias": "pytorch_model-00008-of-00034.bin",
+    "transformer.h.5.mlp.c_proj.weight": "pytorch_model-00008-of-00034.bin",
+    "transformer.h.6.attn.attention.bias": "pytorch_model-00008-of-00034.bin",
+    "transformer.h.6.attn.attention.k_proj.weight": "pytorch_model-00008-of-00034.bin",
+    "transformer.h.6.attn.attention.masked_bias": "pytorch_model-00008-of-00034.bin",
+    "transformer.h.6.attn.attention.out_proj.bias": "pytorch_model-00008-of-00034.bin",
+    "transformer.h.6.attn.attention.out_proj.weight": "pytorch_model-00008-of-00034.bin",
+    "transformer.h.6.attn.attention.q_proj.weight": "pytorch_model-00008-of-00034.bin",
+    "transformer.h.6.attn.attention.v_proj.weight": "pytorch_model-00008-of-00034.bin",
+    "transformer.h.6.ln_1.bias": "pytorch_model-00008-of-00034.bin",
+    "transformer.h.6.ln_1.weight": "pytorch_model-00008-of-00034.bin",
+    "transformer.h.6.ln_2.bias": "pytorch_model-00008-of-00034.bin",
+    "transformer.h.6.ln_2.weight": "pytorch_model-00008-of-00034.bin",
+    "transformer.h.6.mlp.c_fc.bias": "pytorch_model-00009-of-00034.bin",
+    "transformer.h.6.mlp.c_fc.weight": "pytorch_model-00009-of-00034.bin",
+    "transformer.h.6.mlp.c_proj.bias": "pytorch_model-00009-of-00034.bin",
+    "transformer.h.6.mlp.c_proj.weight": "pytorch_model-00009-of-00034.bin",
+    "transformer.h.7.attn.attention.bias": "pytorch_model-00009-of-00034.bin",
+    "transformer.h.7.attn.attention.k_proj.weight": "pytorch_model-00009-of-00034.bin",
+    "transformer.h.7.attn.attention.masked_bias": "pytorch_model-00009-of-00034.bin",
+    "transformer.h.7.attn.attention.out_proj.bias": "pytorch_model-00009-of-00034.bin",
+    "transformer.h.7.attn.attention.out_proj.weight": "pytorch_model-00009-of-00034.bin",
+    "transformer.h.7.attn.attention.q_proj.weight": "pytorch_model-00009-of-00034.bin",
+    "transformer.h.7.attn.attention.v_proj.weight": "pytorch_model-00009-of-00034.bin",
+    "transformer.h.7.ln_1.bias": "pytorch_model-00009-of-00034.bin",
+    "transformer.h.7.ln_1.weight": "pytorch_model-00009-of-00034.bin",
+    "transformer.h.7.ln_2.bias": "pytorch_model-00009-of-00034.bin",
+    "transformer.h.7.ln_2.weight": "pytorch_model-00009-of-00034.bin",
+    "transformer.h.7.mlp.c_fc.bias": "pytorch_model-00010-of-00034.bin",
+    "transformer.h.7.mlp.c_fc.weight": "pytorch_model-00010-of-00034.bin",
+    "transformer.h.7.mlp.c_proj.bias": "pytorch_model-00010-of-00034.bin",
+    "transformer.h.7.mlp.c_proj.weight": "pytorch_model-00010-of-00034.bin",
+    "transformer.h.8.attn.attention.bias": "pytorch_model-00010-of-00034.bin",
+    "transformer.h.8.attn.attention.k_proj.weight": "pytorch_model-00010-of-00034.bin",
+    "transformer.h.8.attn.attention.masked_bias": "pytorch_model-00010-of-00034.bin",
+    "transformer.h.8.attn.attention.out_proj.bias": "pytorch_model-00010-of-00034.bin",
+    "transformer.h.8.attn.attention.out_proj.weight": "pytorch_model-00010-of-00034.bin",
+    "transformer.h.8.attn.attention.q_proj.weight": "pytorch_model-00010-of-00034.bin",
+    "transformer.h.8.attn.attention.v_proj.weight": "pytorch_model-00010-of-00034.bin",
+    "transformer.h.8.ln_1.bias": "pytorch_model-00010-of-00034.bin",
+    "transformer.h.8.ln_1.weight": "pytorch_model-00010-of-00034.bin",
+    "transformer.h.8.ln_2.bias": "pytorch_model-00010-of-00034.bin",
+    "transformer.h.8.ln_2.weight": "pytorch_model-00010-of-00034.bin",
+    "transformer.h.8.mlp.c_fc.bias": "pytorch_model-00011-of-00034.bin",
+    "transformer.h.8.mlp.c_fc.weight": "pytorch_model-00011-of-00034.bin",
+    "transformer.h.8.mlp.c_proj.bias": "pytorch_model-00011-of-00034.bin",
+    "transformer.h.8.mlp.c_proj.weight": "pytorch_model-00011-of-00034.bin",
+    "transformer.h.9.attn.attention.bias": "pytorch_model-00011-of-00034.bin",
+    "transformer.h.9.attn.attention.k_proj.weight": "pytorch_model-00011-of-00034.bin",
+    "transformer.h.9.attn.attention.masked_bias": "pytorch_model-00011-of-00034.bin",
+    "transformer.h.9.attn.attention.out_proj.bias": "pytorch_model-00011-of-00034.bin",
+    "transformer.h.9.attn.attention.out_proj.weight": "pytorch_model-00011-of-00034.bin",
+    "transformer.h.9.attn.attention.q_proj.weight": "pytorch_model-00011-of-00034.bin",
+    "transformer.h.9.attn.attention.v_proj.weight": "pytorch_model-00011-of-00034.bin",
+    "transformer.h.9.ln_1.bias": "pytorch_model-00011-of-00034.bin",
+    "transformer.h.9.ln_1.weight": "pytorch_model-00011-of-00034.bin",
+    "transformer.h.9.ln_2.bias": "pytorch_model-00011-of-00034.bin",
+    "transformer.h.9.ln_2.weight": "pytorch_model-00011-of-00034.bin",
+    "transformer.h.9.mlp.c_fc.bias": "pytorch_model-00012-of-00034.bin",
+    "transformer.h.9.mlp.c_fc.weight": "pytorch_model-00012-of-00034.bin",
+    "transformer.h.9.mlp.c_proj.bias": "pytorch_model-00012-of-00034.bin",
+    "transformer.h.9.mlp.c_proj.weight": "pytorch_model-00012-of-00034.bin",
+    "transformer.ln_f.bias": "pytorch_model-00034-of-00034.bin",
+    "transformer.ln_f.weight": "pytorch_model-00034-of-00034.bin",
+    "transformer.wpe.weight": "pytorch_model-00003-of-00034.bin",
+    "transformer.wte.weight": "pytorch_model-00002-of-00034.bin"
+  }
+}

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,5 @@

+{
+  "bos_token": "<|endoftext|>",
+  "eos_token": "<|endoftext|>",
+  "unk_token": "<|endoftext|>"
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,33 @@

+{
+  "add_bos_token": false,
+  "add_prefix_space": false,
+  "bos_token": {
+    "__type": "AddedToken",
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "__type": "AddedToken",
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "errors": "replace",
+  "model_max_length": 2048,
+  "pad_token": null,
+  "special_tokens_map_file": null,
+  "tokenizer_class": "GPT2Tokenizer",
+  "unk_token": {
+    "__type": "AddedToken",
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  }
+}

vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff