Lyaaaaaaaaaaaaaaa commited on May 6, 2023

Commit

659cb41

1 Parent(s): 6267c56

Upload 39 files

Browse files

Files changed (39) hide show

config.json +74 -0
generation_config.json +6 -0
merges.txt +0 -0
pytorch_model-00001-of-00031.bin +3 -0
pytorch_model-00002-of-00031.bin +3 -0
pytorch_model-00003-of-00031.bin +3 -0
pytorch_model-00004-of-00031.bin +3 -0
pytorch_model-00005-of-00031.bin +3 -0
pytorch_model-00006-of-00031.bin +3 -0
pytorch_model-00007-of-00031.bin +3 -0
pytorch_model-00008-of-00031.bin +3 -0
pytorch_model-00009-of-00031.bin +3 -0
pytorch_model-00010-of-00031.bin +3 -0
pytorch_model-00011-of-00031.bin +3 -0
pytorch_model-00012-of-00031.bin +3 -0
pytorch_model-00013-of-00031.bin +3 -0
pytorch_model-00014-of-00031.bin +3 -0
pytorch_model-00015-of-00031.bin +3 -0
pytorch_model-00016-of-00031.bin +3 -0
pytorch_model-00017-of-00031.bin +3 -0
pytorch_model-00018-of-00031.bin +3 -0
pytorch_model-00019-of-00031.bin +3 -0
pytorch_model-00020-of-00031.bin +3 -0
pytorch_model-00021-of-00031.bin +3 -0
pytorch_model-00022-of-00031.bin +3 -0
pytorch_model-00023-of-00031.bin +3 -0
pytorch_model-00024-of-00031.bin +3 -0
pytorch_model-00025-of-00031.bin +3 -0
pytorch_model-00026-of-00031.bin +3 -0
pytorch_model-00027-of-00031.bin +3 -0
pytorch_model-00028-of-00031.bin +3 -0
pytorch_model-00029-of-00031.bin +3 -0
pytorch_model-00030-of-00031.bin +3 -0
pytorch_model-00031-of-00031.bin +3 -0
pytorch_model.bin.index.json +371 -0
special_tokens_map.json +5 -0
tokenizer.json +0 -0
tokenizer_config.json +33 -0
vocab.json +0 -0

config.json ADDED Viewed

	@@ -0,0 +1,74 @@

+{
+  "_name_or_path": "EleutherAI/gpt-neo-1.3B",
+  "activation_function": "gelu_new",
+  "architectures": [
+    "GPTNeoForCausalLM"
+  ],
+  "attention_dropout": 0,
+  "attention_layers": [
+    "global",
+    "local",
+    "global",
+    "local",
+    "global",
+    "local",
+    "global",
+    "local",
+    "global",
+    "local",
+    "global",
+    "local",
+    "global",
+    "local",
+    "global",
+    "local",
+    "global",
+    "local",
+    "global",
+    "local",
+    "global",
+    "local",
+    "global",
+    "local"
+  ],
+  "attention_types": [
+    [
+      [
+        "global",
+        "local"
+      ],
+      12
+    ]
+  ],
+  "bos_token_id": 50256,
+  "embed_dropout": 0,
+  "eos_token_id": 50256,
+  "gradient_checkpointing": false,
+  "hidden_size": 2048,
+  "initializer_range": 0.02,
+  "intermediate_size": null,
+  "layer_norm_epsilon": 1e-05,
+  "max_position_embeddings": 2048,
+  "model_type": "gpt_neo",
+  "num_heads": 16,
+  "num_layers": 24,
+  "resid_dropout": 0,
+  "summary_activation": null,
+  "summary_first_dropout": 0.1,
+  "summary_proj_to_labels": true,
+  "summary_type": "cls_index",
+  "summary_use_proj": true,
+  "task_specific_params": {
+    "text-generation": {
+      "do_sample": true,
+      "max_length": 50,
+      "temperature": 0.9
+    }
+  },
+  "tokenizer_class": "GPT2Tokenizer",
+  "torch_dtype": "float32",
+  "transformers_version": "4.27.4",
+  "use_cache": true,
+  "vocab_size": 50257,
+  "window_size": 256
+}

generation_config.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 50256,
+  "eos_token_id": 50256,
+  "transformers_version": "4.27.4"
+}

merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

pytorch_model-00001-of-00031.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e4280a7cc4f42f284347fa8f824fdf72f9ffe8a43bb0393ce06f4fae297d94ca
+size 537

pytorch_model-00002-of-00031.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f9c43adebcaeba2a31fdccb35caf93963174396bd47547ded0a13e22e147f9dc
+size 411706282

pytorch_model-00003-of-00031.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:45553940048cc53d0781b40cc8f645d4fee6fd3ae6de4e9b2a2598bc1fe24109
+size 155267915

pytorch_model-00004-of-00031.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c3b135594d54df7d9d2bb77b036ce8a1ced3b89f44fed6a71c7b466295d6d302
+size 138465849

pytorch_model-00005-of-00031.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:04218cb2b673e74db3216ab11e717d9365970def998c6a54ead8526d90d1b511
+size 188805077

pytorch_model-00006-of-00031.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:93f2d1f03d4b236ba48cf0b263e00bc5907c81da292ffb603cdffe7cceb456b0
+size 188830539

pytorch_model-00007-of-00031.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d9acdbdf5991a38e05bee1649c29328eae6a559c18313726df81ae4ca9e31d90
+size 188830539

pytorch_model-00008-of-00031.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:df3e8614b0ff4b5005d33e1eaf97c623c2a59c64fa4eb3eb84070c3afff212bc
+size 188830539

pytorch_model-00009-of-00031.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:31f05552dc8705fc05b2abf093110de9313d1d418fe85317fc4e025d6027135e
+size 134278450

pytorch_model-00010-of-00031.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ea30745293725fc39cee6cef304b049b959671b1545c8336929237591c783b98
+size 138465849

pytorch_model-00011-of-00031.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:95f069107ca7778acf454a8543181ef2e07309d47644a34f7456108984803634
+size 188805077

pytorch_model-00012-of-00031.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:647305554a9d1dbef6573cc3acd2830579e370f722c25fc9a84f662370d3e0b1
+size 188830539

pytorch_model-00013-of-00031.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1c76ce6b5ce092ea5beb1e021c58464a2a35b50993ec8def5dca6a16e0379bcf
+size 188830539

pytorch_model-00014-of-00031.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e81951aeb8e00e7afa43025fe8830eb01091d86d1f9de9ee87916dcc83ca4c9a
+size 188830539

pytorch_model-00015-of-00031.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:897ec9024b4ea00372b87f3a784e4fb143619e7c09dd7994bf758abb847ac496
+size 134278514

pytorch_model-00016-of-00031.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:22c6c8102233cb87384c5c1443e905344d9199f885f59db5465e7962495194c8
+size 138465849

pytorch_model-00017-of-00031.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6ae0dd1e6e58c7bbdb430c24317a789cead78c3e8d2409a33b1b2562c92132f1
+size 188805077

pytorch_model-00018-of-00031.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4633d86b1d769e2dd77ed1aca6786dbb9d24d12344d4f4d9bf93968090face83
+size 188830539

pytorch_model-00019-of-00031.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:25ca0e44cefb890a2647d8e20c0012f92f3f78b92525aa188e9330fe194830b7
+size 188830539

pytorch_model-00020-of-00031.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9a1b7b0b0a2ffcfeaa1b253ea76a4297410e42c8dea7f5239611ff41cc9931c6
+size 188830539

pytorch_model-00021-of-00031.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c328635d3f91089e52bb5da7f4ee0d9b954b3952320f4434919257ca6d3a0fac
+size 134278514

pytorch_model-00022-of-00031.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b7996f7751e496013a4a18c7a667bb583a1948a223df91df5145f82852ba1567
+size 138465849

pytorch_model-00023-of-00031.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:207cfe58e7dfda4364d0ada64e4546195486f77f5ed0024fe2aee66914df47cd
+size 188805077

pytorch_model-00024-of-00031.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3afd7fa67dc29441a48b580d5ed67262d3d7dfcb27d8c1303ee58bdbb6ca50fd
+size 188830539

pytorch_model-00025-of-00031.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4bac36d008a697f193244088e2b293782bbc6582583cb0b2484331b310e6ea22
+size 188830539

pytorch_model-00026-of-00031.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c85a3a4e222eb89314d74a935c28b8afc525a9e3322a48a2365bb58adeba9ab4
+size 188830539

pytorch_model-00027-of-00031.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9650b8d2d7ee36dc1b04f70ed81bcb81bb0fae2070c5f7fe4681219c171186b6
+size 134278514

pytorch_model-00028-of-00031.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e00e9210894975a6b4b782c9a1b8706283d7242ebc92fd6ed176efb008fa863e
+size 138465849

pytorch_model-00029-of-00031.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c76299a017127f4ad860d1139e7afb5536ac768eae2c71eb1b224928cdbe5694
+size 188805077

pytorch_model-00030-of-00031.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:48717e59039c2f105b532ce8f7a7b5dc5c7eedbb8a7b674adabc9cd66b23ca7f
+size 188830539

pytorch_model-00031-of-00031.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7a1466dbfdb656462c50f30e8b5668c0b387c97a60723ce54ee99776b64aed2b
+size 167858069

pytorch_model.bin.index.json ADDED Viewed

	@@ -0,0 +1,371 @@

+{
+  "metadata": {
+    "total_size": 5274886240.0
+  },
+  "weight_map": {
+    "transformer.h.0.attn.attention.bias": "pytorch_model-00003-of-00031.bin",
+    "transformer.h.0.attn.attention.k_proj.weight": "pytorch_model-00003-of-00031.bin",
+    "transformer.h.0.attn.attention.masked_bias": "pytorch_model-00003-of-00031.bin",
+    "transformer.h.0.attn.attention.out_proj.bias": "pytorch_model-00003-of-00031.bin",
+    "transformer.h.0.attn.attention.out_proj.weight": "pytorch_model-00003-of-00031.bin",
+    "transformer.h.0.attn.attention.q_proj.weight": "pytorch_model-00003-of-00031.bin",
+    "transformer.h.0.attn.attention.v_proj.weight": "pytorch_model-00003-of-00031.bin",
+    "transformer.h.0.ln_1.bias": "pytorch_model-00003-of-00031.bin",
+    "transformer.h.0.ln_1.weight": "pytorch_model-00003-of-00031.bin",
+    "transformer.h.0.ln_2.bias": "pytorch_model-00003-of-00031.bin",
+    "transformer.h.0.ln_2.weight": "pytorch_model-00003-of-00031.bin",
+    "transformer.h.0.mlp.c_fc.bias": "pytorch_model-00003-of-00031.bin",
+    "transformer.h.0.mlp.c_fc.weight": "pytorch_model-00003-of-00031.bin",
+    "transformer.h.0.mlp.c_proj.bias": "pytorch_model-00004-of-00031.bin",
+    "transformer.h.0.mlp.c_proj.weight": "pytorch_model-00004-of-00031.bin",
+    "transformer.h.1.attn.attention.bias": "pytorch_model-00004-of-00031.bin",
+    "transformer.h.1.attn.attention.k_proj.weight": "pytorch_model-00004-of-00031.bin",
+    "transformer.h.1.attn.attention.masked_bias": "pytorch_model-00004-of-00031.bin",
+    "transformer.h.1.attn.attention.out_proj.bias": "pytorch_model-00004-of-00031.bin",
+    "transformer.h.1.attn.attention.out_proj.weight": "pytorch_model-00004-of-00031.bin",
+    "transformer.h.1.attn.attention.q_proj.weight": "pytorch_model-00004-of-00031.bin",
+    "transformer.h.1.attn.attention.v_proj.weight": "pytorch_model-00004-of-00031.bin",
+    "transformer.h.1.ln_1.bias": "pytorch_model-00004-of-00031.bin",
+    "transformer.h.1.ln_1.weight": "pytorch_model-00004-of-00031.bin",
+    "transformer.h.1.ln_2.bias": "pytorch_model-00004-of-00031.bin",
+    "transformer.h.1.ln_2.weight": "pytorch_model-00004-of-00031.bin",
+    "transformer.h.1.mlp.c_fc.bias": "pytorch_model-00005-of-00031.bin",
+    "transformer.h.1.mlp.c_fc.weight": "pytorch_model-00005-of-00031.bin",
+    "transformer.h.1.mlp.c_proj.bias": "pytorch_model-00005-of-00031.bin",
+    "transformer.h.1.mlp.c_proj.weight": "pytorch_model-00005-of-00031.bin",
+    "transformer.h.10.attn.attention.bias": "pytorch_model-00014-of-00031.bin",
+    "transformer.h.10.attn.attention.k_proj.weight": "pytorch_model-00015-of-00031.bin",
+    "transformer.h.10.attn.attention.masked_bias": "pytorch_model-00014-of-00031.bin",
+    "transformer.h.10.attn.attention.out_proj.bias": "pytorch_model-00015-of-00031.bin",
+    "transformer.h.10.attn.attention.out_proj.weight": "pytorch_model-00015-of-00031.bin",
+    "transformer.h.10.attn.attention.q_proj.weight": "pytorch_model-00015-of-00031.bin",
+    "transformer.h.10.attn.attention.v_proj.weight": "pytorch_model-00015-of-00031.bin",
+    "transformer.h.10.ln_1.bias": "pytorch_model-00014-of-00031.bin",
+    "transformer.h.10.ln_1.weight": "pytorch_model-00014-of-00031.bin",
+    "transformer.h.10.ln_2.bias": "pytorch_model-00015-of-00031.bin",
+    "transformer.h.10.ln_2.weight": "pytorch_model-00015-of-00031.bin",
+    "transformer.h.10.mlp.c_fc.bias": "pytorch_model-00015-of-00031.bin",
+    "transformer.h.10.mlp.c_fc.weight": "pytorch_model-00015-of-00031.bin",
+    "transformer.h.10.mlp.c_proj.bias": "pytorch_model-00016-of-00031.bin",
+    "transformer.h.10.mlp.c_proj.weight": "pytorch_model-00016-of-00031.bin",
+    "transformer.h.11.attn.attention.bias": "pytorch_model-00016-of-00031.bin",
+    "transformer.h.11.attn.attention.k_proj.weight": "pytorch_model-00016-of-00031.bin",
+    "transformer.h.11.attn.attention.masked_bias": "pytorch_model-00016-of-00031.bin",
+    "transformer.h.11.attn.attention.out_proj.bias": "pytorch_model-00016-of-00031.bin",
+    "transformer.h.11.attn.attention.out_proj.weight": "pytorch_model-00016-of-00031.bin",
+    "transformer.h.11.attn.attention.q_proj.weight": "pytorch_model-00016-of-00031.bin",
+    "transformer.h.11.attn.attention.v_proj.weight": "pytorch_model-00016-of-00031.bin",
+    "transformer.h.11.ln_1.bias": "pytorch_model-00016-of-00031.bin",
+    "transformer.h.11.ln_1.weight": "pytorch_model-00016-of-00031.bin",
+    "transformer.h.11.ln_2.bias": "pytorch_model-00016-of-00031.bin",
+    "transformer.h.11.ln_2.weight": "pytorch_model-00016-of-00031.bin",
+    "transformer.h.11.mlp.c_fc.bias": "pytorch_model-00017-of-00031.bin",
+    "transformer.h.11.mlp.c_fc.weight": "pytorch_model-00017-of-00031.bin",
+    "transformer.h.11.mlp.c_proj.bias": "pytorch_model-00017-of-00031.bin",
+    "transformer.h.11.mlp.c_proj.weight": "pytorch_model-00017-of-00031.bin",
+    "transformer.h.12.attn.attention.bias": "pytorch_model-00017-of-00031.bin",
+    "transformer.h.12.attn.attention.k_proj.weight": "pytorch_model-00017-of-00031.bin",
+    "transformer.h.12.attn.attention.masked_bias": "pytorch_model-00017-of-00031.bin",
+    "transformer.h.12.attn.attention.out_proj.bias": "pytorch_model-00018-of-00031.bin",
+    "transformer.h.12.attn.attention.out_proj.weight": "pytorch_model-00018-of-00031.bin",
+    "transformer.h.12.attn.attention.q_proj.weight": "pytorch_model-00017-of-00031.bin",
+    "transformer.h.12.attn.attention.v_proj.weight": "pytorch_model-00017-of-00031.bin",
+    "transformer.h.12.ln_1.bias": "pytorch_model-00017-of-00031.bin",
+    "transformer.h.12.ln_1.weight": "pytorch_model-00017-of-00031.bin",
+    "transformer.h.12.ln_2.bias": "pytorch_model-00018-of-00031.bin",
+    "transformer.h.12.ln_2.weight": "pytorch_model-00018-of-00031.bin",
+    "transformer.h.12.mlp.c_fc.bias": "pytorch_model-00018-of-00031.bin",
+    "transformer.h.12.mlp.c_fc.weight": "pytorch_model-00018-of-00031.bin",
+    "transformer.h.12.mlp.c_proj.bias": "pytorch_model-00018-of-00031.bin",
+    "transformer.h.12.mlp.c_proj.weight": "pytorch_model-00018-of-00031.bin",
+    "transformer.h.13.attn.attention.bias": "pytorch_model-00018-of-00031.bin",
+    "transformer.h.13.attn.attention.k_proj.weight": "pytorch_model-00018-of-00031.bin",
+    "transformer.h.13.attn.attention.masked_bias": "pytorch_model-00018-of-00031.bin",
+    "transformer.h.13.attn.attention.out_proj.bias": "pytorch_model-00019-of-00031.bin",
+    "transformer.h.13.attn.attention.out_proj.weight": "pytorch_model-00019-of-00031.bin",
+    "transformer.h.13.attn.attention.q_proj.weight": "pytorch_model-00019-of-00031.bin",
+    "transformer.h.13.attn.attention.v_proj.weight": "pytorch_model-00018-of-00031.bin",
+    "transformer.h.13.ln_1.bias": "pytorch_model-00018-of-00031.bin",
+    "transformer.h.13.ln_1.weight": "pytorch_model-00018-of-00031.bin",
+    "transformer.h.13.ln_2.bias": "pytorch_model-00019-of-00031.bin",
+    "transformer.h.13.ln_2.weight": "pytorch_model-00019-of-00031.bin",
+    "transformer.h.13.mlp.c_fc.bias": "pytorch_model-00019-of-00031.bin",
+    "transformer.h.13.mlp.c_fc.weight": "pytorch_model-00019-of-00031.bin",
+    "transformer.h.13.mlp.c_proj.bias": "pytorch_model-00019-of-00031.bin",
+    "transformer.h.13.mlp.c_proj.weight": "pytorch_model-00019-of-00031.bin",
+    "transformer.h.14.attn.attention.bias": "pytorch_model-00019-of-00031.bin",
+    "transformer.h.14.attn.attention.k_proj.weight": "pytorch_model-00019-of-00031.bin",
+    "transformer.h.14.attn.attention.masked_bias": "pytorch_model-00019-of-00031.bin",
+    "transformer.h.14.attn.attention.out_proj.bias": "pytorch_model-00020-of-00031.bin",
+    "transformer.h.14.attn.attention.out_proj.weight": "pytorch_model-00020-of-00031.bin",
+    "transformer.h.14.attn.attention.q_proj.weight": "pytorch_model-00020-of-00031.bin",
+    "transformer.h.14.attn.attention.v_proj.weight": "pytorch_model-00020-of-00031.bin",
+    "transformer.h.14.ln_1.bias": "pytorch_model-00019-of-00031.bin",
+    "transformer.h.14.ln_1.weight": "pytorch_model-00019-of-00031.bin",
+    "transformer.h.14.ln_2.bias": "pytorch_model-00020-of-00031.bin",
+    "transformer.h.14.ln_2.weight": "pytorch_model-00020-of-00031.bin",
+    "transformer.h.14.mlp.c_fc.bias": "pytorch_model-00020-of-00031.bin",
+    "transformer.h.14.mlp.c_fc.weight": "pytorch_model-00020-of-00031.bin",
+    "transformer.h.14.mlp.c_proj.bias": "pytorch_model-00020-of-00031.bin",
+    "transformer.h.14.mlp.c_proj.weight": "pytorch_model-00020-of-00031.bin",
+    "transformer.h.15.attn.attention.bias": "pytorch_model-00020-of-00031.bin",
+    "transformer.h.15.attn.attention.k_proj.weight": "pytorch_model-00021-of-00031.bin",
+    "transformer.h.15.attn.attention.masked_bias": "pytorch_model-00020-of-00031.bin",
+    "transformer.h.15.attn.attention.out_proj.bias": "pytorch_model-00021-of-00031.bin",
+    "transformer.h.15.attn.attention.out_proj.weight": "pytorch_model-00021-of-00031.bin",
+    "transformer.h.15.attn.attention.q_proj.weight": "pytorch_model-00021-of-00031.bin",
+    "transformer.h.15.attn.attention.v_proj.weight": "pytorch_model-00021-of-00031.bin",
+    "transformer.h.15.ln_1.bias": "pytorch_model-00020-of-00031.bin",
+    "transformer.h.15.ln_1.weight": "pytorch_model-00020-of-00031.bin",
+    "transformer.h.15.ln_2.bias": "pytorch_model-00021-of-00031.bin",
+    "transformer.h.15.ln_2.weight": "pytorch_model-00021-of-00031.bin",
+    "transformer.h.15.mlp.c_fc.bias": "pytorch_model-00021-of-00031.bin",
+    "transformer.h.15.mlp.c_fc.weight": "pytorch_model-00021-of-00031.bin",
+    "transformer.h.15.mlp.c_proj.bias": "pytorch_model-00022-of-00031.bin",
+    "transformer.h.15.mlp.c_proj.weight": "pytorch_model-00022-of-00031.bin",
+    "transformer.h.16.attn.attention.bias": "pytorch_model-00022-of-00031.bin",
+    "transformer.h.16.attn.attention.k_proj.weight": "pytorch_model-00022-of-00031.bin",
+    "transformer.h.16.attn.attention.masked_bias": "pytorch_model-00022-of-00031.bin",
+    "transformer.h.16.attn.attention.out_proj.bias": "pytorch_model-00022-of-00031.bin",
+    "transformer.h.16.attn.attention.out_proj.weight": "pytorch_model-00022-of-00031.bin",
+    "transformer.h.16.attn.attention.q_proj.weight": "pytorch_model-00022-of-00031.bin",
+    "transformer.h.16.attn.attention.v_proj.weight": "pytorch_model-00022-of-00031.bin",
+    "transformer.h.16.ln_1.bias": "pytorch_model-00022-of-00031.bin",
+    "transformer.h.16.ln_1.weight": "pytorch_model-00022-of-00031.bin",
+    "transformer.h.16.ln_2.bias": "pytorch_model-00022-of-00031.bin",
+    "transformer.h.16.ln_2.weight": "pytorch_model-00022-of-00031.bin",
+    "transformer.h.16.mlp.c_fc.bias": "pytorch_model-00023-of-00031.bin",
+    "transformer.h.16.mlp.c_fc.weight": "pytorch_model-00023-of-00031.bin",
+    "transformer.h.16.mlp.c_proj.bias": "pytorch_model-00023-of-00031.bin",
+    "transformer.h.16.mlp.c_proj.weight": "pytorch_model-00023-of-00031.bin",
+    "transformer.h.17.attn.attention.bias": "pytorch_model-00023-of-00031.bin",
+    "transformer.h.17.attn.attention.k_proj.weight": "pytorch_model-00023-of-00031.bin",
+    "transformer.h.17.attn.attention.masked_bias": "pytorch_model-00023-of-00031.bin",
+    "transformer.h.17.attn.attention.out_proj.bias": "pytorch_model-00024-of-00031.bin",
+    "transformer.h.17.attn.attention.out_proj.weight": "pytorch_model-00024-of-00031.bin",
+    "transformer.h.17.attn.attention.q_proj.weight": "pytorch_model-00023-of-00031.bin",
+    "transformer.h.17.attn.attention.v_proj.weight": "pytorch_model-00023-of-00031.bin",
+    "transformer.h.17.ln_1.bias": "pytorch_model-00023-of-00031.bin",
+    "transformer.h.17.ln_1.weight": "pytorch_model-00023-of-00031.bin",
+    "transformer.h.17.ln_2.bias": "pytorch_model-00024-of-00031.bin",
+    "transformer.h.17.ln_2.weight": "pytorch_model-00024-of-00031.bin",
+    "transformer.h.17.mlp.c_fc.bias": "pytorch_model-00024-of-00031.bin",
+    "transformer.h.17.mlp.c_fc.weight": "pytorch_model-00024-of-00031.bin",
+    "transformer.h.17.mlp.c_proj.bias": "pytorch_model-00024-of-00031.bin",
+    "transformer.h.17.mlp.c_proj.weight": "pytorch_model-00024-of-00031.bin",
+    "transformer.h.18.attn.attention.bias": "pytorch_model-00024-of-00031.bin",
+    "transformer.h.18.attn.attention.k_proj.weight": "pytorch_model-00024-of-00031.bin",
+    "transformer.h.18.attn.attention.masked_bias": "pytorch_model-00024-of-00031.bin",
+    "transformer.h.18.attn.attention.out_proj.bias": "pytorch_model-00025-of-00031.bin",
+    "transformer.h.18.attn.attention.out_proj.weight": "pytorch_model-00025-of-00031.bin",
+    "transformer.h.18.attn.attention.q_proj.weight": "pytorch_model-00025-of-00031.bin",
+    "transformer.h.18.attn.attention.v_proj.weight": "pytorch_model-00024-of-00031.bin",
+    "transformer.h.18.ln_1.bias": "pytorch_model-00024-of-00031.bin",
+    "transformer.h.18.ln_1.weight": "pytorch_model-00024-of-00031.bin",
+    "transformer.h.18.ln_2.bias": "pytorch_model-00025-of-00031.bin",
+    "transformer.h.18.ln_2.weight": "pytorch_model-00025-of-00031.bin",
+    "transformer.h.18.mlp.c_fc.bias": "pytorch_model-00025-of-00031.bin",
+    "transformer.h.18.mlp.c_fc.weight": "pytorch_model-00025-of-00031.bin",
+    "transformer.h.18.mlp.c_proj.bias": "pytorch_model-00025-of-00031.bin",
+    "transformer.h.18.mlp.c_proj.weight": "pytorch_model-00025-of-00031.bin",
+    "transformer.h.19.attn.attention.bias": "pytorch_model-00025-of-00031.bin",
+    "transformer.h.19.attn.attention.k_proj.weight": "pytorch_model-00025-of-00031.bin",
+    "transformer.h.19.attn.attention.masked_bias": "pytorch_model-00025-of-00031.bin",
+    "transformer.h.19.attn.attention.out_proj.bias": "pytorch_model-00026-of-00031.bin",
+    "transformer.h.19.attn.attention.out_proj.weight": "pytorch_model-00026-of-00031.bin",
+    "transformer.h.19.attn.attention.q_proj.weight": "pytorch_model-00026-of-00031.bin",
+    "transformer.h.19.attn.attention.v_proj.weight": "pytorch_model-00026-of-00031.bin",
+    "transformer.h.19.ln_1.bias": "pytorch_model-00025-of-00031.bin",
+    "transformer.h.19.ln_1.weight": "pytorch_model-00025-of-00031.bin",
+    "transformer.h.19.ln_2.bias": "pytorch_model-00026-of-00031.bin",
+    "transformer.h.19.ln_2.weight": "pytorch_model-00026-of-00031.bin",
+    "transformer.h.19.mlp.c_fc.bias": "pytorch_model-00026-of-00031.bin",
+    "transformer.h.19.mlp.c_fc.weight": "pytorch_model-00026-of-00031.bin",
+    "transformer.h.19.mlp.c_proj.bias": "pytorch_model-00026-of-00031.bin",
+    "transformer.h.19.mlp.c_proj.weight": "pytorch_model-00026-of-00031.bin",
+    "transformer.h.2.attn.attention.bias": "pytorch_model-00005-of-00031.bin",
+    "transformer.h.2.attn.attention.k_proj.weight": "pytorch_model-00005-of-00031.bin",
+    "transformer.h.2.attn.attention.masked_bias": "pytorch_model-00005-of-00031.bin",
+    "transformer.h.2.attn.attention.out_proj.bias": "pytorch_model-00006-of-00031.bin",
+    "transformer.h.2.attn.attention.out_proj.weight": "pytorch_model-00006-of-00031.bin",
+    "transformer.h.2.attn.attention.q_proj.weight": "pytorch_model-00005-of-00031.bin",
+    "transformer.h.2.attn.attention.v_proj.weight": "pytorch_model-00005-of-00031.bin",
+    "transformer.h.2.ln_1.bias": "pytorch_model-00005-of-00031.bin",
+    "transformer.h.2.ln_1.weight": "pytorch_model-00005-of-00031.bin",
+    "transformer.h.2.ln_2.bias": "pytorch_model-00006-of-00031.bin",
+    "transformer.h.2.ln_2.weight": "pytorch_model-00006-of-00031.bin",
+    "transformer.h.2.mlp.c_fc.bias": "pytorch_model-00006-of-00031.bin",
+    "transformer.h.2.mlp.c_fc.weight": "pytorch_model-00006-of-00031.bin",
+    "transformer.h.2.mlp.c_proj.bias": "pytorch_model-00006-of-00031.bin",
+    "transformer.h.2.mlp.c_proj.weight": "pytorch_model-00006-of-00031.bin",
+    "transformer.h.20.attn.attention.bias": "pytorch_model-00026-of-00031.bin",
+    "transformer.h.20.attn.attention.k_proj.weight": "pytorch_model-00027-of-00031.bin",
+    "transformer.h.20.attn.attention.masked_bias": "pytorch_model-00026-of-00031.bin",
+    "transformer.h.20.attn.attention.out_proj.bias": "pytorch_model-00027-of-00031.bin",
+    "transformer.h.20.attn.attention.out_proj.weight": "pytorch_model-00027-of-00031.bin",
+    "transformer.h.20.attn.attention.q_proj.weight": "pytorch_model-00027-of-00031.bin",
+    "transformer.h.20.attn.attention.v_proj.weight": "pytorch_model-00027-of-00031.bin",
+    "transformer.h.20.ln_1.bias": "pytorch_model-00026-of-00031.bin",
+    "transformer.h.20.ln_1.weight": "pytorch_model-00026-of-00031.bin",
+    "transformer.h.20.ln_2.bias": "pytorch_model-00027-of-00031.bin",
+    "transformer.h.20.ln_2.weight": "pytorch_model-00027-of-00031.bin",
+    "transformer.h.20.mlp.c_fc.bias": "pytorch_model-00027-of-00031.bin",
+    "transformer.h.20.mlp.c_fc.weight": "pytorch_model-00027-of-00031.bin",
+    "transformer.h.20.mlp.c_proj.bias": "pytorch_model-00028-of-00031.bin",
+    "transformer.h.20.mlp.c_proj.weight": "pytorch_model-00028-of-00031.bin",
+    "transformer.h.21.attn.attention.bias": "pytorch_model-00028-of-00031.bin",
+    "transformer.h.21.attn.attention.k_proj.weight": "pytorch_model-00028-of-00031.bin",
+    "transformer.h.21.attn.attention.masked_bias": "pytorch_model-00028-of-00031.bin",
+    "transformer.h.21.attn.attention.out_proj.bias": "pytorch_model-00028-of-00031.bin",
+    "transformer.h.21.attn.attention.out_proj.weight": "pytorch_model-00028-of-00031.bin",
+    "transformer.h.21.attn.attention.q_proj.weight": "pytorch_model-00028-of-00031.bin",
+    "transformer.h.21.attn.attention.v_proj.weight": "pytorch_model-00028-of-00031.bin",
+    "transformer.h.21.ln_1.bias": "pytorch_model-00028-of-00031.bin",
+    "transformer.h.21.ln_1.weight": "pytorch_model-00028-of-00031.bin",
+    "transformer.h.21.ln_2.bias": "pytorch_model-00028-of-00031.bin",
+    "transformer.h.21.ln_2.weight": "pytorch_model-00028-of-00031.bin",
+    "transformer.h.21.mlp.c_fc.bias": "pytorch_model-00029-of-00031.bin",
+    "transformer.h.21.mlp.c_fc.weight": "pytorch_model-00029-of-00031.bin",
+    "transformer.h.21.mlp.c_proj.bias": "pytorch_model-00029-of-00031.bin",
+    "transformer.h.21.mlp.c_proj.weight": "pytorch_model-00029-of-00031.bin",
+    "transformer.h.22.attn.attention.bias": "pytorch_model-00029-of-00031.bin",
+    "transformer.h.22.attn.attention.k_proj.weight": "pytorch_model-00029-of-00031.bin",
+    "transformer.h.22.attn.attention.masked_bias": "pytorch_model-00029-of-00031.bin",
+    "transformer.h.22.attn.attention.out_proj.bias": "pytorch_model-00030-of-00031.bin",
+    "transformer.h.22.attn.attention.out_proj.weight": "pytorch_model-00030-of-00031.bin",
+    "transformer.h.22.attn.attention.q_proj.weight": "pytorch_model-00029-of-00031.bin",
+    "transformer.h.22.attn.attention.v_proj.weight": "pytorch_model-00029-of-00031.bin",
+    "transformer.h.22.ln_1.bias": "pytorch_model-00029-of-00031.bin",
+    "transformer.h.22.ln_1.weight": "pytorch_model-00029-of-00031.bin",
+    "transformer.h.22.ln_2.bias": "pytorch_model-00030-of-00031.bin",
+    "transformer.h.22.ln_2.weight": "pytorch_model-00030-of-00031.bin",
+    "transformer.h.22.mlp.c_fc.bias": "pytorch_model-00030-of-00031.bin",
+    "transformer.h.22.mlp.c_fc.weight": "pytorch_model-00030-of-00031.bin",
+    "transformer.h.22.mlp.c_proj.bias": "pytorch_model-00030-of-00031.bin",
+    "transformer.h.22.mlp.c_proj.weight": "pytorch_model-00030-of-00031.bin",
+    "transformer.h.23.attn.attention.bias": "pytorch_model-00030-of-00031.bin",
+    "transformer.h.23.attn.attention.k_proj.weight": "pytorch_model-00030-of-00031.bin",
+    "transformer.h.23.attn.attention.masked_bias": "pytorch_model-00030-of-00031.bin",
+    "transformer.h.23.attn.attention.out_proj.bias": "pytorch_model-00031-of-00031.bin",
+    "transformer.h.23.attn.attention.out_proj.weight": "pytorch_model-00031-of-00031.bin",
+    "transformer.h.23.attn.attention.q_proj.weight": "pytorch_model-00031-of-00031.bin",
+    "transformer.h.23.attn.attention.v_proj.weight": "pytorch_model-00030-of-00031.bin",
+    "transformer.h.23.ln_1.bias": "pytorch_model-00030-of-00031.bin",
+    "transformer.h.23.ln_1.weight": "pytorch_model-00030-of-00031.bin",
+    "transformer.h.23.ln_2.bias": "pytorch_model-00031-of-00031.bin",
+    "transformer.h.23.ln_2.weight": "pytorch_model-00031-of-00031.bin",
+    "transformer.h.23.mlp.c_fc.bias": "pytorch_model-00031-of-00031.bin",
+    "transformer.h.23.mlp.c_fc.weight": "pytorch_model-00031-of-00031.bin",
+    "transformer.h.23.mlp.c_proj.bias": "pytorch_model-00031-of-00031.bin",
+    "transformer.h.23.mlp.c_proj.weight": "pytorch_model-00031-of-00031.bin",
+    "transformer.h.3.attn.attention.bias": "pytorch_model-00006-of-00031.bin",
+    "transformer.h.3.attn.attention.k_proj.weight": "pytorch_model-00006-of-00031.bin",
+    "transformer.h.3.attn.attention.masked_bias": "pytorch_model-00006-of-00031.bin",
+    "transformer.h.3.attn.attention.out_proj.bias": "pytorch_model-00007-of-00031.bin",
+    "transformer.h.3.attn.attention.out_proj.weight": "pytorch_model-00007-of-00031.bin",
+    "transformer.h.3.attn.attention.q_proj.weight": "pytorch_model-00007-of-00031.bin",
+    "transformer.h.3.attn.attention.v_proj.weight": "pytorch_model-00006-of-00031.bin",
+    "transformer.h.3.ln_1.bias": "pytorch_model-00006-of-00031.bin",
+    "transformer.h.3.ln_1.weight": "pytorch_model-00006-of-00031.bin",
+    "transformer.h.3.ln_2.bias": "pytorch_model-00007-of-00031.bin",
+    "transformer.h.3.ln_2.weight": "pytorch_model-00007-of-00031.bin",
+    "transformer.h.3.mlp.c_fc.bias": "pytorch_model-00007-of-00031.bin",
+    "transformer.h.3.mlp.c_fc.weight": "pytorch_model-00007-of-00031.bin",
+    "transformer.h.3.mlp.c_proj.bias": "pytorch_model-00007-of-00031.bin",
+    "transformer.h.3.mlp.c_proj.weight": "pytorch_model-00007-of-00031.bin",
+    "transformer.h.4.attn.attention.bias": "pytorch_model-00007-of-00031.bin",
+    "transformer.h.4.attn.attention.k_proj.weight": "pytorch_model-00007-of-00031.bin",
+    "transformer.h.4.attn.attention.masked_bias": "pytorch_model-00007-of-00031.bin",
+    "transformer.h.4.attn.attention.out_proj.bias": "pytorch_model-00008-of-00031.bin",
+    "transformer.h.4.attn.attention.out_proj.weight": "pytorch_model-00008-of-00031.bin",
+    "transformer.h.4.attn.attention.q_proj.weight": "pytorch_model-00008-of-00031.bin",
+    "transformer.h.4.attn.attention.v_proj.weight": "pytorch_model-00008-of-00031.bin",
+    "transformer.h.4.ln_1.bias": "pytorch_model-00007-of-00031.bin",
+    "transformer.h.4.ln_1.weight": "pytorch_model-00007-of-00031.bin",
+    "transformer.h.4.ln_2.bias": "pytorch_model-00008-of-00031.bin",
+    "transformer.h.4.ln_2.weight": "pytorch_model-00008-of-00031.bin",
+    "transformer.h.4.mlp.c_fc.bias": "pytorch_model-00008-of-00031.bin",
+    "transformer.h.4.mlp.c_fc.weight": "pytorch_model-00008-of-00031.bin",
+    "transformer.h.4.mlp.c_proj.bias": "pytorch_model-00008-of-00031.bin",
+    "transformer.h.4.mlp.c_proj.weight": "pytorch_model-00008-of-00031.bin",
+    "transformer.h.5.attn.attention.bias": "pytorch_model-00008-of-00031.bin",
+    "transformer.h.5.attn.attention.k_proj.weight": "pytorch_model-00009-of-00031.bin",
+    "transformer.h.5.attn.attention.masked_bias": "pytorch_model-00008-of-00031.bin",
+    "transformer.h.5.attn.attention.out_proj.bias": "pytorch_model-00009-of-00031.bin",
+    "transformer.h.5.attn.attention.out_proj.weight": "pytorch_model-00009-of-00031.bin",
+    "transformer.h.5.attn.attention.q_proj.weight": "pytorch_model-00009-of-00031.bin",
+    "transformer.h.5.attn.attention.v_proj.weight": "pytorch_model-00009-of-00031.bin",
+    "transformer.h.5.ln_1.bias": "pytorch_model-00008-of-00031.bin",
+    "transformer.h.5.ln_1.weight": "pytorch_model-00008-of-00031.bin",
+    "transformer.h.5.ln_2.bias": "pytorch_model-00009-of-00031.bin",
+    "transformer.h.5.ln_2.weight": "pytorch_model-00009-of-00031.bin",
+    "transformer.h.5.mlp.c_fc.bias": "pytorch_model-00009-of-00031.bin",
+    "transformer.h.5.mlp.c_fc.weight": "pytorch_model-00009-of-00031.bin",
+    "transformer.h.5.mlp.c_proj.bias": "pytorch_model-00010-of-00031.bin",
+    "transformer.h.5.mlp.c_proj.weight": "pytorch_model-00010-of-00031.bin",
+    "transformer.h.6.attn.attention.bias": "pytorch_model-00010-of-00031.bin",
+    "transformer.h.6.attn.attention.k_proj.weight": "pytorch_model-00010-of-00031.bin",
+    "transformer.h.6.attn.attention.masked_bias": "pytorch_model-00010-of-00031.bin",
+    "transformer.h.6.attn.attention.out_proj.bias": "pytorch_model-00010-of-00031.bin",
+    "transformer.h.6.attn.attention.out_proj.weight": "pytorch_model-00010-of-00031.bin",
+    "transformer.h.6.attn.attention.q_proj.weight": "pytorch_model-00010-of-00031.bin",
+    "transformer.h.6.attn.attention.v_proj.weight": "pytorch_model-00010-of-00031.bin",
+    "transformer.h.6.ln_1.bias": "pytorch_model-00010-of-00031.bin",
+    "transformer.h.6.ln_1.weight": "pytorch_model-00010-of-00031.bin",
+    "transformer.h.6.ln_2.bias": "pytorch_model-00010-of-00031.bin",
+    "transformer.h.6.ln_2.weight": "pytorch_model-00010-of-00031.bin",
+    "transformer.h.6.mlp.c_fc.bias": "pytorch_model-00011-of-00031.bin",
+    "transformer.h.6.mlp.c_fc.weight": "pytorch_model-00011-of-00031.bin",
+    "transformer.h.6.mlp.c_proj.bias": "pytorch_model-00011-of-00031.bin",
+    "transformer.h.6.mlp.c_proj.weight": "pytorch_model-00011-of-00031.bin",
+    "transformer.h.7.attn.attention.bias": "pytorch_model-00011-of-00031.bin",
+    "transformer.h.7.attn.attention.k_proj.weight": "pytorch_model-00011-of-00031.bin",
+    "transformer.h.7.attn.attention.masked_bias": "pytorch_model-00011-of-00031.bin",
+    "transformer.h.7.attn.attention.out_proj.bias": "pytorch_model-00012-of-00031.bin",
+    "transformer.h.7.attn.attention.out_proj.weight": "pytorch_model-00012-of-00031.bin",
+    "transformer.h.7.attn.attention.q_proj.weight": "pytorch_model-00011-of-00031.bin",
+    "transformer.h.7.attn.attention.v_proj.weight": "pytorch_model-00011-of-00031.bin",
+    "transformer.h.7.ln_1.bias": "pytorch_model-00011-of-00031.bin",
+    "transformer.h.7.ln_1.weight": "pytorch_model-00011-of-00031.bin",
+    "transformer.h.7.ln_2.bias": "pytorch_model-00012-of-00031.bin",
+    "transformer.h.7.ln_2.weight": "pytorch_model-00012-of-00031.bin",
+    "transformer.h.7.mlp.c_fc.bias": "pytorch_model-00012-of-00031.bin",
+    "transformer.h.7.mlp.c_fc.weight": "pytorch_model-00012-of-00031.bin",
+    "transformer.h.7.mlp.c_proj.bias": "pytorch_model-00012-of-00031.bin",
+    "transformer.h.7.mlp.c_proj.weight": "pytorch_model-00012-of-00031.bin",
+    "transformer.h.8.attn.attention.bias": "pytorch_model-00012-of-00031.bin",
+    "transformer.h.8.attn.attention.k_proj.weight": "pytorch_model-00012-of-00031.bin",
+    "transformer.h.8.attn.attention.masked_bias": "pytorch_model-00012-of-00031.bin",
+    "transformer.h.8.attn.attention.out_proj.bias": "pytorch_model-00013-of-00031.bin",
+    "transformer.h.8.attn.attention.out_proj.weight": "pytorch_model-00013-of-00031.bin",
+    "transformer.h.8.attn.attention.q_proj.weight": "pytorch_model-00013-of-00031.bin",
+    "transformer.h.8.attn.attention.v_proj.weight": "pytorch_model-00012-of-00031.bin",
+    "transformer.h.8.ln_1.bias": "pytorch_model-00012-of-00031.bin",
+    "transformer.h.8.ln_1.weight": "pytorch_model-00012-of-00031.bin",
+    "transformer.h.8.ln_2.bias": "pytorch_model-00013-of-00031.bin",
+    "transformer.h.8.ln_2.weight": "pytorch_model-00013-of-00031.bin",
+    "transformer.h.8.mlp.c_fc.bias": "pytorch_model-00013-of-00031.bin",
+    "transformer.h.8.mlp.c_fc.weight": "pytorch_model-00013-of-00031.bin",
+    "transformer.h.8.mlp.c_proj.bias": "pytorch_model-00013-of-00031.bin",
+    "transformer.h.8.mlp.c_proj.weight": "pytorch_model-00013-of-00031.bin",
+    "transformer.h.9.attn.attention.bias": "pytorch_model-00013-of-00031.bin",
+    "transformer.h.9.attn.attention.k_proj.weight": "pytorch_model-00013-of-00031.bin",
+    "transformer.h.9.attn.attention.masked_bias": "pytorch_model-00013-of-00031.bin",
+    "transformer.h.9.attn.attention.out_proj.bias": "pytorch_model-00014-of-00031.bin",
+    "transformer.h.9.attn.attention.out_proj.weight": "pytorch_model-00014-of-00031.bin",
+    "transformer.h.9.attn.attention.q_proj.weight": "pytorch_model-00014-of-00031.bin",
+    "transformer.h.9.attn.attention.v_proj.weight": "pytorch_model-00014-of-00031.bin",
+    "transformer.h.9.ln_1.bias": "pytorch_model-00013-of-00031.bin",
+    "transformer.h.9.ln_1.weight": "pytorch_model-00013-of-00031.bin",
+    "transformer.h.9.ln_2.bias": "pytorch_model-00014-of-00031.bin",
+    "transformer.h.9.ln_2.weight": "pytorch_model-00014-of-00031.bin",
+    "transformer.h.9.mlp.c_fc.bias": "pytorch_model-00014-of-00031.bin",
+    "transformer.h.9.mlp.c_fc.weight": "pytorch_model-00014-of-00031.bin",
+    "transformer.h.9.mlp.c_proj.bias": "pytorch_model-00014-of-00031.bin",
+    "transformer.h.9.mlp.c_proj.weight": "pytorch_model-00014-of-00031.bin",
+    "transformer.ln_f.bias": "pytorch_model-00031-of-00031.bin",
+    "transformer.ln_f.weight": "pytorch_model-00031-of-00031.bin",
+    "transformer.wpe.weight": "pytorch_model-00003-of-00031.bin",
+    "transformer.wte.weight": "pytorch_model-00002-of-00031.bin"
+  }
+}

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,5 @@

+{
+  "bos_token": "<|endoftext|>",
+  "eos_token": "<|endoftext|>",
+  "unk_token": "<|endoftext|>"
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,33 @@

+{
+  "add_bos_token": false,
+  "add_prefix_space": false,
+  "bos_token": {
+    "__type": "AddedToken",
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "__type": "AddedToken",
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "errors": "replace",
+  "model_max_length": 2048,
+  "pad_token": null,
+  "special_tokens_map_file": null,
+  "tokenizer_class": "GPT2Tokenizer",
+  "unk_token": {
+    "__type": "AddedToken",
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  }
+}

vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff