It works!

Browse files

Files changed (12) hide show

added_tokens.json +103 -0
config.json +28 -31
flax_model.msgpack +0 -3
generation_config.json +1 -1
model.safetensors +2 -2
pytorch_model-00001-of-00002.bin +0 -3
pytorch_model-00002-of-00002.bin +0 -3
pytorch_model.bin.index.json +0 -751
special_tokens_map.json +21 -3
spiece.model +2 -2
tokenizer.json +2 -2
tokenizer_config.json +826 -0

added_tokens.json ADDED Viewed

	@@ -0,0 +1,103 @@

+{
+  "<extra_id_0>": 256099,
+  "<extra_id_10>": 256089,
+  "<extra_id_11>": 256088,
+  "<extra_id_12>": 256087,
+  "<extra_id_13>": 256086,
+  "<extra_id_14>": 256085,
+  "<extra_id_15>": 256084,
+  "<extra_id_16>": 256083,
+  "<extra_id_17>": 256082,
+  "<extra_id_18>": 256081,
+  "<extra_id_19>": 256080,
+  "<extra_id_1>": 256098,
+  "<extra_id_20>": 256079,
+  "<extra_id_21>": 256078,
+  "<extra_id_22>": 256077,
+  "<extra_id_23>": 256076,
+  "<extra_id_24>": 256075,
+  "<extra_id_25>": 256074,
+  "<extra_id_26>": 256073,
+  "<extra_id_27>": 256072,
+  "<extra_id_28>": 256071,
+  "<extra_id_29>": 256070,
+  "<extra_id_2>": 256097,
+  "<extra_id_30>": 256069,
+  "<extra_id_31>": 256068,
+  "<extra_id_32>": 256067,
+  "<extra_id_33>": 256066,
+  "<extra_id_34>": 256065,
+  "<extra_id_35>": 256064,
+  "<extra_id_36>": 256063,
+  "<extra_id_37>": 256062,
+  "<extra_id_38>": 256061,
+  "<extra_id_39>": 256060,
+  "<extra_id_3>": 256096,
+  "<extra_id_40>": 256059,
+  "<extra_id_41>": 256058,
+  "<extra_id_42>": 256057,
+  "<extra_id_43>": 256056,
+  "<extra_id_44>": 256055,
+  "<extra_id_45>": 256054,
+  "<extra_id_46>": 256053,
+  "<extra_id_47>": 256052,
+  "<extra_id_48>": 256051,
+  "<extra_id_49>": 256050,
+  "<extra_id_4>": 256095,
+  "<extra_id_50>": 256049,
+  "<extra_id_51>": 256048,
+  "<extra_id_52>": 256047,
+  "<extra_id_53>": 256046,
+  "<extra_id_54>": 256045,
+  "<extra_id_55>": 256044,
+  "<extra_id_56>": 256043,
+  "<extra_id_57>": 256042,
+  "<extra_id_58>": 256041,
+  "<extra_id_59>": 256040,
+  "<extra_id_5>": 256094,
+  "<extra_id_60>": 256039,
+  "<extra_id_61>": 256038,
+  "<extra_id_62>": 256037,
+  "<extra_id_63>": 256036,
+  "<extra_id_64>": 256035,
+  "<extra_id_65>": 256034,
+  "<extra_id_66>": 256033,
+  "<extra_id_67>": 256032,
+  "<extra_id_68>": 256031,
+  "<extra_id_69>": 256030,
+  "<extra_id_6>": 256093,
+  "<extra_id_70>": 256029,
+  "<extra_id_71>": 256028,
+  "<extra_id_72>": 256027,
+  "<extra_id_73>": 256026,
+  "<extra_id_74>": 256025,
+  "<extra_id_75>": 256024,
+  "<extra_id_76>": 256023,
+  "<extra_id_77>": 256022,
+  "<extra_id_78>": 256021,
+  "<extra_id_79>": 256020,
+  "<extra_id_7>": 256092,
+  "<extra_id_80>": 256019,
+  "<extra_id_81>": 256018,
+  "<extra_id_82>": 256017,
+  "<extra_id_83>": 256016,
+  "<extra_id_84>": 256015,
+  "<extra_id_85>": 256014,
+  "<extra_id_86>": 256013,
+  "<extra_id_87>": 256012,
+  "<extra_id_88>": 256011,
+  "<extra_id_89>": 256010,
+  "<extra_id_8>": 256091,
+  "<extra_id_90>": 256009,
+  "<extra_id_91>": 256008,
+  "<extra_id_92>": 256007,
+  "<extra_id_93>": 256006,
+  "<extra_id_94>": 256005,
+  "<extra_id_95>": 256004,
+  "<extra_id_96>": 256003,
+  "<extra_id_97>": 256002,
+  "<extra_id_98>": 256001,
+  "<extra_id_99>": 256000,
+  "<extra_id_9>": 256090,
+  "<pad>": 256100
+}

config.json CHANGED Viewed

@@ -1,32 +1,29 @@
 {
-  "architectures": [
-    "T5ForConditionalGeneration"
-  ],
-  "classifier_dropout": 0.0,
-  "d_ff": 8192,
-  "d_kv": 128,
-  "d_model": 1024,
-  "decoder_start_token_id": 1,
-  "dense_act_fn": "gelu_new",
-  "dropout_rate": 0.1,
-  "eos_token_id": 2,
-  "feed_forward_proj": "gated-gelu",
-  "initializer_factor": 1.0,
-  "is_encoder_decoder": true,
-  "is_gated_act": true,
-  "layer_norm_epsilon": 1e-06,
-  "model_type": "t5",
-  "n_positions": 512,
-  "num_decoder_layers": 32,
-  "num_heads": 16,
-  "num_layers": 32,
-  "output_past": true,
-  "pad_token_id": 1,
-  "relative_attention_max_distance": 128,
-  "relative_attention_num_buckets": 32,
-  "task_specific_params": {},
-  "tie_word_embeddings": false,
-  "transformers_version": "4.33.2",
-  "use_cache": true,
-  "vocab_size": 256000
-}

 {
+    "architectures": [
+        "T5ForConditionalGeneration"
+    ],
+    "d_ff": 8192,
+    "d_kv": 128,
+    "d_model": 1024,
+    "dropout_rate": 0.1,
+    "decoder_start_token_id": 1,
+    "pad_token_id": 1,
+    "eos_token_id": 2,
+    "feed_forward_proj": "gated-gelu",
+    "initializer_factor": 1.0,
+    "is_encoder_decoder": true,
+    "layer_norm_epsilon": 1e-06,
+    "model_type": "t5",
+    "n_positions": 512,
+    "num_decoder_layers": 32,
+    "num_heads": 16,
+    "num_layers": 32,
+    "output_past": true,
+    "relative_attention_max_distance": 128,
+    "relative_attention_num_buckets": 32,
+    "task_specific_params": {},
+    "tie_word_embeddings": false,
+    "transformers_version": "4.23.1",
+    "use_cache": true,
+    "vocab_size": 256000
+}

flax_model.msgpack DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:9dc94c6dc47a8e24d50b810d3ece0ae3b78b66ef310d053557b4ffe8ad6b1b77
-size 11761528083

generation_config.json CHANGED Viewed

@@ -3,5 +3,5 @@
   "decoder_start_token_id": 1,
   "eos_token_id": 2,
   "pad_token_id": 1,
-  "transformers_version": "4.33.2"
 }

   "decoder_start_token_id": 1,
   "eos_token_id": 2,
   "pad_token_id": 1,
+  "transformers_version": "4.35.0"
 }

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1ddca188b8a3db3666eae1b7176f9bbd20a6b378efc3dbc2a823564c5b9d8e7c
-size 12810163832

 version https://git-lfs.github.com/spec/v1
+oid sha256:66ff5f8fcaf92291da486fdfbd4d5233cec90e1359348a56e3172c978b3a76d4
+size 11761587872

pytorch_model-00001-of-00002.bin DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:6a28403955d7d7699a5745c2b6e4f02eee8962d85d5e4b98e509329c72b9f607
-size 9974907514

pytorch_model-00002-of-00002.bin DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:3368d731cf5b286d309f44ccae713abdc074ac71668b0a13ba533968f4ad6972
-size 1786851801

pytorch_model.bin.index.json DELETED Viewed

@@ -1,751 +0,0 @@
-{
-  "metadata": {
-    "total_size": 11761496064
-  },
-  "weight_map": {
-    "decoder.block.0.layer.0.SelfAttention.k.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.0.layer.0.SelfAttention.o.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.0.layer.0.SelfAttention.q.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.0.layer.0.SelfAttention.relative_attention_bias.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.0.layer.0.SelfAttention.v.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.0.layer.0.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.0.layer.1.EncDecAttention.k.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.0.layer.1.EncDecAttention.o.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.0.layer.1.EncDecAttention.q.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.0.layer.1.EncDecAttention.v.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.0.layer.1.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.0.layer.2.DenseReluDense.wi_0.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.0.layer.2.DenseReluDense.wi_1.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.0.layer.2.DenseReluDense.wo.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.0.layer.2.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.1.layer.0.SelfAttention.k.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.1.layer.0.SelfAttention.o.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.1.layer.0.SelfAttention.q.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.1.layer.0.SelfAttention.v.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.1.layer.0.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.1.layer.1.EncDecAttention.k.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.1.layer.1.EncDecAttention.o.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.1.layer.1.EncDecAttention.q.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.1.layer.1.EncDecAttention.v.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.1.layer.1.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.1.layer.2.DenseReluDense.wi_0.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.1.layer.2.DenseReluDense.wi_1.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.1.layer.2.DenseReluDense.wo.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.1.layer.2.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.10.layer.0.SelfAttention.k.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.10.layer.0.SelfAttention.o.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.10.layer.0.SelfAttention.q.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.10.layer.0.SelfAttention.v.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.10.layer.0.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.10.layer.1.EncDecAttention.k.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.10.layer.1.EncDecAttention.o.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.10.layer.1.EncDecAttention.q.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.10.layer.1.EncDecAttention.v.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.10.layer.1.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.10.layer.2.DenseReluDense.wi_0.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.10.layer.2.DenseReluDense.wi_1.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.10.layer.2.DenseReluDense.wo.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.10.layer.2.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.11.layer.0.SelfAttention.k.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.11.layer.0.SelfAttention.o.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.11.layer.0.SelfAttention.q.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.11.layer.0.SelfAttention.v.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.11.layer.0.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.11.layer.1.EncDecAttention.k.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.11.layer.1.EncDecAttention.o.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.11.layer.1.EncDecAttention.q.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.11.layer.1.EncDecAttention.v.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.11.layer.1.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.11.layer.2.DenseReluDense.wi_0.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.11.layer.2.DenseReluDense.wi_1.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.11.layer.2.DenseReluDense.wo.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.11.layer.2.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.12.layer.0.SelfAttention.k.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.12.layer.0.SelfAttention.o.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.12.layer.0.SelfAttention.q.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.12.layer.0.SelfAttention.v.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.12.layer.0.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.12.layer.1.EncDecAttention.k.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.12.layer.1.EncDecAttention.o.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.12.layer.1.EncDecAttention.q.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.12.layer.1.EncDecAttention.v.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.12.layer.1.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.12.layer.2.DenseReluDense.wi_0.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.12.layer.2.DenseReluDense.wi_1.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.12.layer.2.DenseReluDense.wo.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.12.layer.2.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.13.layer.0.SelfAttention.k.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.13.layer.0.SelfAttention.o.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.13.layer.0.SelfAttention.q.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.13.layer.0.SelfAttention.v.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.13.layer.0.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.13.layer.1.EncDecAttention.k.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.13.layer.1.EncDecAttention.o.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.13.layer.1.EncDecAttention.q.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.13.layer.1.EncDecAttention.v.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.13.layer.1.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.13.layer.2.DenseReluDense.wi_0.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.13.layer.2.DenseReluDense.wi_1.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.13.layer.2.DenseReluDense.wo.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.13.layer.2.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.14.layer.0.SelfAttention.k.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.14.layer.0.SelfAttention.o.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.14.layer.0.SelfAttention.q.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.14.layer.0.SelfAttention.v.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.14.layer.0.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.14.layer.1.EncDecAttention.k.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.14.layer.1.EncDecAttention.o.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.14.layer.1.EncDecAttention.q.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.14.layer.1.EncDecAttention.v.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.14.layer.1.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.14.layer.2.DenseReluDense.wi_0.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.14.layer.2.DenseReluDense.wi_1.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.14.layer.2.DenseReluDense.wo.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.14.layer.2.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.15.layer.0.SelfAttention.k.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.15.layer.0.SelfAttention.o.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.15.layer.0.SelfAttention.q.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.15.layer.0.SelfAttention.v.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.15.layer.0.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.15.layer.1.EncDecAttention.k.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.15.layer.1.EncDecAttention.o.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.15.layer.1.EncDecAttention.q.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.15.layer.1.EncDecAttention.v.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.15.layer.1.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.15.layer.2.DenseReluDense.wi_0.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.15.layer.2.DenseReluDense.wi_1.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.15.layer.2.DenseReluDense.wo.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.15.layer.2.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.16.layer.0.SelfAttention.k.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.16.layer.0.SelfAttention.o.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.16.layer.0.SelfAttention.q.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.16.layer.0.SelfAttention.v.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.16.layer.0.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.16.layer.1.EncDecAttention.k.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.16.layer.1.EncDecAttention.o.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.16.layer.1.EncDecAttention.q.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.16.layer.1.EncDecAttention.v.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.16.layer.1.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.16.layer.2.DenseReluDense.wi_0.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.16.layer.2.DenseReluDense.wi_1.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.16.layer.2.DenseReluDense.wo.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.16.layer.2.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.17.layer.0.SelfAttention.k.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.17.layer.0.SelfAttention.o.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.17.layer.0.SelfAttention.q.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.17.layer.0.SelfAttention.v.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.17.layer.0.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.17.layer.1.EncDecAttention.k.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.17.layer.1.EncDecAttention.o.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.17.layer.1.EncDecAttention.q.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.17.layer.1.EncDecAttention.v.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.17.layer.1.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.17.layer.2.DenseReluDense.wi_0.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.17.layer.2.DenseReluDense.wi_1.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.17.layer.2.DenseReluDense.wo.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.17.layer.2.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.18.layer.0.SelfAttention.k.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.18.layer.0.SelfAttention.o.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.18.layer.0.SelfAttention.q.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.18.layer.0.SelfAttention.v.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.18.layer.0.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.18.layer.1.EncDecAttention.k.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.18.layer.1.EncDecAttention.o.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.18.layer.1.EncDecAttention.q.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.18.layer.1.EncDecAttention.v.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.18.layer.1.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.18.layer.2.DenseReluDense.wi_0.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.18.layer.2.DenseReluDense.wi_1.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.18.layer.2.DenseReluDense.wo.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.18.layer.2.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.19.layer.0.SelfAttention.k.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.19.layer.0.SelfAttention.o.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.19.layer.0.SelfAttention.q.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.19.layer.0.SelfAttention.v.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.19.layer.0.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.19.layer.1.EncDecAttention.k.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.19.layer.1.EncDecAttention.o.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.19.layer.1.EncDecAttention.q.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.19.layer.1.EncDecAttention.v.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.19.layer.1.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.19.layer.2.DenseReluDense.wi_0.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.19.layer.2.DenseReluDense.wi_1.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.19.layer.2.DenseReluDense.wo.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.19.layer.2.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.2.layer.0.SelfAttention.k.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.2.layer.0.SelfAttention.o.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.2.layer.0.SelfAttention.q.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.2.layer.0.SelfAttention.v.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.2.layer.0.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.2.layer.1.EncDecAttention.k.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.2.layer.1.EncDecAttention.o.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.2.layer.1.EncDecAttention.q.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.2.layer.1.EncDecAttention.v.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.2.layer.1.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.2.layer.2.DenseReluDense.wi_0.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.2.layer.2.DenseReluDense.wi_1.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.2.layer.2.DenseReluDense.wo.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.2.layer.2.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.20.layer.0.SelfAttention.k.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.20.layer.0.SelfAttention.o.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.20.layer.0.SelfAttention.q.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.20.layer.0.SelfAttention.v.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.20.layer.0.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.20.layer.1.EncDecAttention.k.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.20.layer.1.EncDecAttention.o.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.20.layer.1.EncDecAttention.q.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.20.layer.1.EncDecAttention.v.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.20.layer.1.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.20.layer.2.DenseReluDense.wi_0.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.20.layer.2.DenseReluDense.wi_1.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.20.layer.2.DenseReluDense.wo.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.20.layer.2.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.21.layer.0.SelfAttention.k.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.21.layer.0.SelfAttention.o.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.21.layer.0.SelfAttention.q.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.21.layer.0.SelfAttention.v.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.21.layer.0.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.21.layer.1.EncDecAttention.k.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.21.layer.1.EncDecAttention.o.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.21.layer.1.EncDecAttention.q.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.21.layer.1.EncDecAttention.v.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.21.layer.1.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.21.layer.2.DenseReluDense.wi_0.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.21.layer.2.DenseReluDense.wi_1.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.21.layer.2.DenseReluDense.wo.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.21.layer.2.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.22.layer.0.SelfAttention.k.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.22.layer.0.SelfAttention.o.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.22.layer.0.SelfAttention.q.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.22.layer.0.SelfAttention.v.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.22.layer.0.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.22.layer.1.EncDecAttention.k.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.22.layer.1.EncDecAttention.o.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.22.layer.1.EncDecAttention.q.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.22.layer.1.EncDecAttention.v.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.22.layer.1.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.22.layer.2.DenseReluDense.wi_0.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.22.layer.2.DenseReluDense.wi_1.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.22.layer.2.DenseReluDense.wo.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.22.layer.2.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.23.layer.0.SelfAttention.k.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.23.layer.0.SelfAttention.o.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.23.layer.0.SelfAttention.q.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.23.layer.0.SelfAttention.v.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.23.layer.0.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.23.layer.1.EncDecAttention.k.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.23.layer.1.EncDecAttention.o.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.23.layer.1.EncDecAttention.q.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.23.layer.1.EncDecAttention.v.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.23.layer.1.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.23.layer.2.DenseReluDense.wi_0.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.23.layer.2.DenseReluDense.wi_1.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.23.layer.2.DenseReluDense.wo.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.23.layer.2.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.24.layer.0.SelfAttention.k.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.24.layer.0.SelfAttention.o.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.24.layer.0.SelfAttention.q.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.24.layer.0.SelfAttention.v.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.24.layer.0.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.24.layer.1.EncDecAttention.k.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.24.layer.1.EncDecAttention.o.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.24.layer.1.EncDecAttention.q.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.24.layer.1.EncDecAttention.v.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.24.layer.1.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.24.layer.2.DenseReluDense.wi_0.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.24.layer.2.DenseReluDense.wi_1.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.24.layer.2.DenseReluDense.wo.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.24.layer.2.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.25.layer.0.SelfAttention.k.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.25.layer.0.SelfAttention.o.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.25.layer.0.SelfAttention.q.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.25.layer.0.SelfAttention.v.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.25.layer.0.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.25.layer.1.EncDecAttention.k.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.25.layer.1.EncDecAttention.o.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.25.layer.1.EncDecAttention.q.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.25.layer.1.EncDecAttention.v.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.25.layer.1.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.25.layer.2.DenseReluDense.wi_0.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.25.layer.2.DenseReluDense.wi_1.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.25.layer.2.DenseReluDense.wo.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.25.layer.2.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.26.layer.0.SelfAttention.k.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.26.layer.0.SelfAttention.o.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.26.layer.0.SelfAttention.q.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.26.layer.0.SelfAttention.v.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.26.layer.0.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.26.layer.1.EncDecAttention.k.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.26.layer.1.EncDecAttention.o.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.26.layer.1.EncDecAttention.q.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.26.layer.1.EncDecAttention.v.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.26.layer.1.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.26.layer.2.DenseReluDense.wi_0.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.26.layer.2.DenseReluDense.wi_1.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.26.layer.2.DenseReluDense.wo.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.26.layer.2.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.27.layer.0.SelfAttention.k.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.27.layer.0.SelfAttention.o.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.27.layer.0.SelfAttention.q.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.27.layer.0.SelfAttention.v.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.27.layer.0.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.27.layer.1.EncDecAttention.k.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.27.layer.1.EncDecAttention.o.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.27.layer.1.EncDecAttention.q.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.27.layer.1.EncDecAttention.v.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.27.layer.1.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.27.layer.2.DenseReluDense.wi_0.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.27.layer.2.DenseReluDense.wi_1.weight": "pytorch_model-00002-of-00002.bin",
-    "decoder.block.27.layer.2.DenseReluDense.wo.weight": "pytorch_model-00002-of-00002.bin",
-    "decoder.block.27.layer.2.layer_norm.weight": "pytorch_model-00002-of-00002.bin",
-    "decoder.block.28.layer.0.SelfAttention.k.weight": "pytorch_model-00002-of-00002.bin",
-    "decoder.block.28.layer.0.SelfAttention.o.weight": "pytorch_model-00002-of-00002.bin",
-    "decoder.block.28.layer.0.SelfAttention.q.weight": "pytorch_model-00002-of-00002.bin",
-    "decoder.block.28.layer.0.SelfAttention.v.weight": "pytorch_model-00002-of-00002.bin",
-    "decoder.block.28.layer.0.layer_norm.weight": "pytorch_model-00002-of-00002.bin",
-    "decoder.block.28.layer.1.EncDecAttention.k.weight": "pytorch_model-00002-of-00002.bin",
-    "decoder.block.28.layer.1.EncDecAttention.o.weight": "pytorch_model-00002-of-00002.bin",
-    "decoder.block.28.layer.1.EncDecAttention.q.weight": "pytorch_model-00002-of-00002.bin",
-    "decoder.block.28.layer.1.EncDecAttention.v.weight": "pytorch_model-00002-of-00002.bin",
-    "decoder.block.28.layer.1.layer_norm.weight": "pytorch_model-00002-of-00002.bin",
-    "decoder.block.28.layer.2.DenseReluDense.wi_0.weight": "pytorch_model-00002-of-00002.bin",
-    "decoder.block.28.layer.2.DenseReluDense.wi_1.weight": "pytorch_model-00002-of-00002.bin",
-    "decoder.block.28.layer.2.DenseReluDense.wo.weight": "pytorch_model-00002-of-00002.bin",
-    "decoder.block.28.layer.2.layer_norm.weight": "pytorch_model-00002-of-00002.bin",
-    "decoder.block.29.layer.0.SelfAttention.k.weight": "pytorch_model-00002-of-00002.bin",
-    "decoder.block.29.layer.0.SelfAttention.o.weight": "pytorch_model-00002-of-00002.bin",
-    "decoder.block.29.layer.0.SelfAttention.q.weight": "pytorch_model-00002-of-00002.bin",
-    "decoder.block.29.layer.0.SelfAttention.v.weight": "pytorch_model-00002-of-00002.bin",
-    "decoder.block.29.layer.0.layer_norm.weight": "pytorch_model-00002-of-00002.bin",
-    "decoder.block.29.layer.1.EncDecAttention.k.weight": "pytorch_model-00002-of-00002.bin",
-    "decoder.block.29.layer.1.EncDecAttention.o.weight": "pytorch_model-00002-of-00002.bin",
-    "decoder.block.29.layer.1.EncDecAttention.q.weight": "pytorch_model-00002-of-00002.bin",
-    "decoder.block.29.layer.1.EncDecAttention.v.weight": "pytorch_model-00002-of-00002.bin",
-    "decoder.block.29.layer.1.layer_norm.weight": "pytorch_model-00002-of-00002.bin",
-    "decoder.block.29.layer.2.DenseReluDense.wi_0.weight": "pytorch_model-00002-of-00002.bin",
-    "decoder.block.29.layer.2.DenseReluDense.wi_1.weight": "pytorch_model-00002-of-00002.bin",
-    "decoder.block.29.layer.2.DenseReluDense.wo.weight": "pytorch_model-00002-of-00002.bin",
-    "decoder.block.29.layer.2.layer_norm.weight": "pytorch_model-00002-of-00002.bin",
-    "decoder.block.3.layer.0.SelfAttention.k.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.3.layer.0.SelfAttention.o.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.3.layer.0.SelfAttention.q.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.3.layer.0.SelfAttention.v.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.3.layer.0.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.3.layer.1.EncDecAttention.k.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.3.layer.1.EncDecAttention.o.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.3.layer.1.EncDecAttention.q.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.3.layer.1.EncDecAttention.v.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.3.layer.1.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.3.layer.2.DenseReluDense.wi_0.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.3.layer.2.DenseReluDense.wi_1.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.3.layer.2.DenseReluDense.wo.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.3.layer.2.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.30.layer.0.SelfAttention.k.weight": "pytorch_model-00002-of-00002.bin",
-    "decoder.block.30.layer.0.SelfAttention.o.weight": "pytorch_model-00002-of-00002.bin",
-    "decoder.block.30.layer.0.SelfAttention.q.weight": "pytorch_model-00002-of-00002.bin",
-    "decoder.block.30.layer.0.SelfAttention.v.weight": "pytorch_model-00002-of-00002.bin",
-    "decoder.block.30.layer.0.layer_norm.weight": "pytorch_model-00002-of-00002.bin",
-    "decoder.block.30.layer.1.EncDecAttention.k.weight": "pytorch_model-00002-of-00002.bin",
-    "decoder.block.30.layer.1.EncDecAttention.o.weight": "pytorch_model-00002-of-00002.bin",
-    "decoder.block.30.layer.1.EncDecAttention.q.weight": "pytorch_model-00002-of-00002.bin",
-    "decoder.block.30.layer.1.EncDecAttention.v.weight": "pytorch_model-00002-of-00002.bin",
-    "decoder.block.30.layer.1.layer_norm.weight": "pytorch_model-00002-of-00002.bin",
-    "decoder.block.30.layer.2.DenseReluDense.wi_0.weight": "pytorch_model-00002-of-00002.bin",
-    "decoder.block.30.layer.2.DenseReluDense.wi_1.weight": "pytorch_model-00002-of-00002.bin",
-    "decoder.block.30.layer.2.DenseReluDense.wo.weight": "pytorch_model-00002-of-00002.bin",
-    "decoder.block.30.layer.2.layer_norm.weight": "pytorch_model-00002-of-00002.bin",
-    "decoder.block.31.layer.0.SelfAttention.k.weight": "pytorch_model-00002-of-00002.bin",
-    "decoder.block.31.layer.0.SelfAttention.o.weight": "pytorch_model-00002-of-00002.bin",
-    "decoder.block.31.layer.0.SelfAttention.q.weight": "pytorch_model-00002-of-00002.bin",
-    "decoder.block.31.layer.0.SelfAttention.v.weight": "pytorch_model-00002-of-00002.bin",
-    "decoder.block.31.layer.0.layer_norm.weight": "pytorch_model-00002-of-00002.bin",
-    "decoder.block.31.layer.1.EncDecAttention.k.weight": "pytorch_model-00002-of-00002.bin",
-    "decoder.block.31.layer.1.EncDecAttention.o.weight": "pytorch_model-00002-of-00002.bin",
-    "decoder.block.31.layer.1.EncDecAttention.q.weight": "pytorch_model-00002-of-00002.bin",
-    "decoder.block.31.layer.1.EncDecAttention.v.weight": "pytorch_model-00002-of-00002.bin",
-    "decoder.block.31.layer.1.layer_norm.weight": "pytorch_model-00002-of-00002.bin",
-    "decoder.block.31.layer.2.DenseReluDense.wi_0.weight": "pytorch_model-00002-of-00002.bin",
-    "decoder.block.31.layer.2.DenseReluDense.wi_1.weight": "pytorch_model-00002-of-00002.bin",
-    "decoder.block.31.layer.2.DenseReluDense.wo.weight": "pytorch_model-00002-of-00002.bin",
-    "decoder.block.31.layer.2.layer_norm.weight": "pytorch_model-00002-of-00002.bin",
-    "decoder.block.4.layer.0.SelfAttention.k.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.4.layer.0.SelfAttention.o.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.4.layer.0.SelfAttention.q.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.4.layer.0.SelfAttention.v.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.4.layer.0.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.4.layer.1.EncDecAttention.k.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.4.layer.1.EncDecAttention.o.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.4.layer.1.EncDecAttention.q.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.4.layer.1.EncDecAttention.v.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.4.layer.1.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.4.layer.2.DenseReluDense.wi_0.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.4.layer.2.DenseReluDense.wi_1.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.4.layer.2.DenseReluDense.wo.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.4.layer.2.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.5.layer.0.SelfAttention.k.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.5.layer.0.SelfAttention.o.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.5.layer.0.SelfAttention.q.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.5.layer.0.SelfAttention.v.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.5.layer.0.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.5.layer.1.EncDecAttention.k.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.5.layer.1.EncDecAttention.o.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.5.layer.1.EncDecAttention.q.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.5.layer.1.EncDecAttention.v.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.5.layer.1.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.5.layer.2.DenseReluDense.wi_0.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.5.layer.2.DenseReluDense.wi_1.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.5.layer.2.DenseReluDense.wo.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.5.layer.2.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.6.layer.0.SelfAttention.k.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.6.layer.0.SelfAttention.o.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.6.layer.0.SelfAttention.q.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.6.layer.0.SelfAttention.v.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.6.layer.0.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.6.layer.1.EncDecAttention.k.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.6.layer.1.EncDecAttention.o.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.6.layer.1.EncDecAttention.q.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.6.layer.1.EncDecAttention.v.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.6.layer.1.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.6.layer.2.DenseReluDense.wi_0.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.6.layer.2.DenseReluDense.wi_1.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.6.layer.2.DenseReluDense.wo.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.6.layer.2.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.7.layer.0.SelfAttention.k.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.7.layer.0.SelfAttention.o.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.7.layer.0.SelfAttention.q.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.7.layer.0.SelfAttention.v.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.7.layer.0.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.7.layer.1.EncDecAttention.k.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.7.layer.1.EncDecAttention.o.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.7.layer.1.EncDecAttention.q.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.7.layer.1.EncDecAttention.v.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.7.layer.1.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.7.layer.2.DenseReluDense.wi_0.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.7.layer.2.DenseReluDense.wi_1.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.7.layer.2.DenseReluDense.wo.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.7.layer.2.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.8.layer.0.SelfAttention.k.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.8.layer.0.SelfAttention.o.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.8.layer.0.SelfAttention.q.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.8.layer.0.SelfAttention.v.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.8.layer.0.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.8.layer.1.EncDecAttention.k.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.8.layer.1.EncDecAttention.o.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.8.layer.1.EncDecAttention.q.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.8.layer.1.EncDecAttention.v.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.8.layer.1.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.8.layer.2.DenseReluDense.wi_0.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.8.layer.2.DenseReluDense.wi_1.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.8.layer.2.DenseReluDense.wo.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.8.layer.2.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.9.layer.0.SelfAttention.k.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.9.layer.0.SelfAttention.o.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.9.layer.0.SelfAttention.q.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.9.layer.0.SelfAttention.v.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.9.layer.0.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.9.layer.1.EncDecAttention.k.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.9.layer.1.EncDecAttention.o.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.9.layer.1.EncDecAttention.q.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.9.layer.1.EncDecAttention.v.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.9.layer.1.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.9.layer.2.DenseReluDense.wi_0.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.9.layer.2.DenseReluDense.wi_1.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.9.layer.2.DenseReluDense.wo.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.block.9.layer.2.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.embed_tokens.weight": "pytorch_model-00001-of-00002.bin",
-    "decoder.final_layer_norm.weight": "pytorch_model-00002-of-00002.bin",
-    "encoder.block.0.layer.0.SelfAttention.k.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.0.layer.0.SelfAttention.o.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.0.layer.0.SelfAttention.q.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.0.layer.0.SelfAttention.relative_attention_bias.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.0.layer.0.SelfAttention.v.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.0.layer.0.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.0.layer.1.DenseReluDense.wi_0.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.0.layer.1.DenseReluDense.wi_1.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.0.layer.1.DenseReluDense.wo.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.0.layer.1.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.1.layer.0.SelfAttention.k.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.1.layer.0.SelfAttention.o.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.1.layer.0.SelfAttention.q.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.1.layer.0.SelfAttention.v.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.1.layer.0.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.1.layer.1.DenseReluDense.wi_0.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.1.layer.1.DenseReluDense.wi_1.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.1.layer.1.DenseReluDense.wo.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.1.layer.1.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.10.layer.0.SelfAttention.k.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.10.layer.0.SelfAttention.o.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.10.layer.0.SelfAttention.q.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.10.layer.0.SelfAttention.v.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.10.layer.0.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.10.layer.1.DenseReluDense.wi_0.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.10.layer.1.DenseReluDense.wi_1.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.10.layer.1.DenseReluDense.wo.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.10.layer.1.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.11.layer.0.SelfAttention.k.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.11.layer.0.SelfAttention.o.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.11.layer.0.SelfAttention.q.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.11.layer.0.SelfAttention.v.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.11.layer.0.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.11.layer.1.DenseReluDense.wi_0.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.11.layer.1.DenseReluDense.wi_1.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.11.layer.1.DenseReluDense.wo.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.11.layer.1.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.12.layer.0.SelfAttention.k.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.12.layer.0.SelfAttention.o.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.12.layer.0.SelfAttention.q.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.12.layer.0.SelfAttention.v.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.12.layer.0.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.12.layer.1.DenseReluDense.wi_0.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.12.layer.1.DenseReluDense.wi_1.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.12.layer.1.DenseReluDense.wo.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.12.layer.1.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.13.layer.0.SelfAttention.k.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.13.layer.0.SelfAttention.o.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.13.layer.0.SelfAttention.q.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.13.layer.0.SelfAttention.v.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.13.layer.0.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.13.layer.1.DenseReluDense.wi_0.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.13.layer.1.DenseReluDense.wi_1.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.13.layer.1.DenseReluDense.wo.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.13.layer.1.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.14.layer.0.SelfAttention.k.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.14.layer.0.SelfAttention.o.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.14.layer.0.SelfAttention.q.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.14.layer.0.SelfAttention.v.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.14.layer.0.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.14.layer.1.DenseReluDense.wi_0.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.14.layer.1.DenseReluDense.wi_1.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.14.layer.1.DenseReluDense.wo.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.14.layer.1.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.15.layer.0.SelfAttention.k.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.15.layer.0.SelfAttention.o.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.15.layer.0.SelfAttention.q.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.15.layer.0.SelfAttention.v.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.15.layer.0.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.15.layer.1.DenseReluDense.wi_0.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.15.layer.1.DenseReluDense.wi_1.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.15.layer.1.DenseReluDense.wo.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.15.layer.1.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.16.layer.0.SelfAttention.k.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.16.layer.0.SelfAttention.o.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.16.layer.0.SelfAttention.q.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.16.layer.0.SelfAttention.v.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.16.layer.0.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.16.layer.1.DenseReluDense.wi_0.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.16.layer.1.DenseReluDense.wi_1.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.16.layer.1.DenseReluDense.wo.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.16.layer.1.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.17.layer.0.SelfAttention.k.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.17.layer.0.SelfAttention.o.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.17.layer.0.SelfAttention.q.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.17.layer.0.SelfAttention.v.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.17.layer.0.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.17.layer.1.DenseReluDense.wi_0.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.17.layer.1.DenseReluDense.wi_1.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.17.layer.1.DenseReluDense.wo.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.17.layer.1.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.18.layer.0.SelfAttention.k.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.18.layer.0.SelfAttention.o.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.18.layer.0.SelfAttention.q.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.18.layer.0.SelfAttention.v.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.18.layer.0.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.18.layer.1.DenseReluDense.wi_0.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.18.layer.1.DenseReluDense.wi_1.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.18.layer.1.DenseReluDense.wo.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.18.layer.1.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.19.layer.0.SelfAttention.k.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.19.layer.0.SelfAttention.o.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.19.layer.0.SelfAttention.q.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.19.layer.0.SelfAttention.v.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.19.layer.0.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.19.layer.1.DenseReluDense.wi_0.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.19.layer.1.DenseReluDense.wi_1.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.19.layer.1.DenseReluDense.wo.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.19.layer.1.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.2.layer.0.SelfAttention.k.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.2.layer.0.SelfAttention.o.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.2.layer.0.SelfAttention.q.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.2.layer.0.SelfAttention.v.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.2.layer.0.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.2.layer.1.DenseReluDense.wi_0.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.2.layer.1.DenseReluDense.wi_1.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.2.layer.1.DenseReluDense.wo.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.2.layer.1.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.20.layer.0.SelfAttention.k.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.20.layer.0.SelfAttention.o.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.20.layer.0.SelfAttention.q.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.20.layer.0.SelfAttention.v.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.20.layer.0.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.20.layer.1.DenseReluDense.wi_0.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.20.layer.1.DenseReluDense.wi_1.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.20.layer.1.DenseReluDense.wo.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.20.layer.1.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.21.layer.0.SelfAttention.k.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.21.layer.0.SelfAttention.o.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.21.layer.0.SelfAttention.q.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.21.layer.0.SelfAttention.v.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.21.layer.0.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.21.layer.1.DenseReluDense.wi_0.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.21.layer.1.DenseReluDense.wi_1.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.21.layer.1.DenseReluDense.wo.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.21.layer.1.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.22.layer.0.SelfAttention.k.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.22.layer.0.SelfAttention.o.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.22.layer.0.SelfAttention.q.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.22.layer.0.SelfAttention.v.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.22.layer.0.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.22.layer.1.DenseReluDense.wi_0.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.22.layer.1.DenseReluDense.wi_1.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.22.layer.1.DenseReluDense.wo.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.22.layer.1.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.23.layer.0.SelfAttention.k.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.23.layer.0.SelfAttention.o.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.23.layer.0.SelfAttention.q.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.23.layer.0.SelfAttention.v.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.23.layer.0.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.23.layer.1.DenseReluDense.wi_0.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.23.layer.1.DenseReluDense.wi_1.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.23.layer.1.DenseReluDense.wo.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.23.layer.1.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.24.layer.0.SelfAttention.k.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.24.layer.0.SelfAttention.o.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.24.layer.0.SelfAttention.q.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.24.layer.0.SelfAttention.v.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.24.layer.0.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.24.layer.1.DenseReluDense.wi_0.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.24.layer.1.DenseReluDense.wi_1.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.24.layer.1.DenseReluDense.wo.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.24.layer.1.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.25.layer.0.SelfAttention.k.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.25.layer.0.SelfAttention.o.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.25.layer.0.SelfAttention.q.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.25.layer.0.SelfAttention.v.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.25.layer.0.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.25.layer.1.DenseReluDense.wi_0.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.25.layer.1.DenseReluDense.wi_1.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.25.layer.1.DenseReluDense.wo.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.25.layer.1.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.26.layer.0.SelfAttention.k.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.26.layer.0.SelfAttention.o.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.26.layer.0.SelfAttention.q.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.26.layer.0.SelfAttention.v.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.26.layer.0.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.26.layer.1.DenseReluDense.wi_0.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.26.layer.1.DenseReluDense.wi_1.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.26.layer.1.DenseReluDense.wo.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.26.layer.1.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.27.layer.0.SelfAttention.k.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.27.layer.0.SelfAttention.o.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.27.layer.0.SelfAttention.q.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.27.layer.0.SelfAttention.v.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.27.layer.0.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.27.layer.1.DenseReluDense.wi_0.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.27.layer.1.DenseReluDense.wi_1.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.27.layer.1.DenseReluDense.wo.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.27.layer.1.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.28.layer.0.SelfAttention.k.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.28.layer.0.SelfAttention.o.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.28.layer.0.SelfAttention.q.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.28.layer.0.SelfAttention.v.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.28.layer.0.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.28.layer.1.DenseReluDense.wi_0.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.28.layer.1.DenseReluDense.wi_1.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.28.layer.1.DenseReluDense.wo.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.28.layer.1.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.29.layer.0.SelfAttention.k.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.29.layer.0.SelfAttention.o.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.29.layer.0.SelfAttention.q.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.29.layer.0.SelfAttention.v.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.29.layer.0.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.29.layer.1.DenseReluDense.wi_0.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.29.layer.1.DenseReluDense.wi_1.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.29.layer.1.DenseReluDense.wo.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.29.layer.1.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.3.layer.0.SelfAttention.k.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.3.layer.0.SelfAttention.o.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.3.layer.0.SelfAttention.q.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.3.layer.0.SelfAttention.v.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.3.layer.0.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.3.layer.1.DenseReluDense.wi_0.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.3.layer.1.DenseReluDense.wi_1.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.3.layer.1.DenseReluDense.wo.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.3.layer.1.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.30.layer.0.SelfAttention.k.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.30.layer.0.SelfAttention.o.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.30.layer.0.SelfAttention.q.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.30.layer.0.SelfAttention.v.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.30.layer.0.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.30.layer.1.DenseReluDense.wi_0.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.30.layer.1.DenseReluDense.wi_1.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.30.layer.1.DenseReluDense.wo.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.30.layer.1.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.31.layer.0.SelfAttention.k.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.31.layer.0.SelfAttention.o.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.31.layer.0.SelfAttention.q.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.31.layer.0.SelfAttention.v.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.31.layer.0.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.31.layer.1.DenseReluDense.wi_0.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.31.layer.1.DenseReluDense.wi_1.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.31.layer.1.DenseReluDense.wo.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.31.layer.1.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.4.layer.0.SelfAttention.k.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.4.layer.0.SelfAttention.o.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.4.layer.0.SelfAttention.q.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.4.layer.0.SelfAttention.v.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.4.layer.0.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.4.layer.1.DenseReluDense.wi_0.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.4.layer.1.DenseReluDense.wi_1.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.4.layer.1.DenseReluDense.wo.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.4.layer.1.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.5.layer.0.SelfAttention.k.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.5.layer.0.SelfAttention.o.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.5.layer.0.SelfAttention.q.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.5.layer.0.SelfAttention.v.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.5.layer.0.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.5.layer.1.DenseReluDense.wi_0.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.5.layer.1.DenseReluDense.wi_1.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.5.layer.1.DenseReluDense.wo.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.5.layer.1.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.6.layer.0.SelfAttention.k.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.6.layer.0.SelfAttention.o.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.6.layer.0.SelfAttention.q.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.6.layer.0.SelfAttention.v.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.6.layer.0.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.6.layer.1.DenseReluDense.wi_0.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.6.layer.1.DenseReluDense.wi_1.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.6.layer.1.DenseReluDense.wo.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.6.layer.1.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.7.layer.0.SelfAttention.k.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.7.layer.0.SelfAttention.o.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.7.layer.0.SelfAttention.q.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.7.layer.0.SelfAttention.v.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.7.layer.0.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.7.layer.1.DenseReluDense.wi_0.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.7.layer.1.DenseReluDense.wi_1.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.7.layer.1.DenseReluDense.wo.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.7.layer.1.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.8.layer.0.SelfAttention.k.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.8.layer.0.SelfAttention.o.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.8.layer.0.SelfAttention.q.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.8.layer.0.SelfAttention.v.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.8.layer.0.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.8.layer.1.DenseReluDense.wi_0.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.8.layer.1.DenseReluDense.wi_1.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.8.layer.1.DenseReluDense.wo.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.8.layer.1.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.9.layer.0.SelfAttention.k.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.9.layer.0.SelfAttention.o.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.9.layer.0.SelfAttention.q.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.9.layer.0.SelfAttention.v.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.9.layer.0.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.9.layer.1.DenseReluDense.wi_0.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.9.layer.1.DenseReluDense.wi_1.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.9.layer.1.DenseReluDense.wo.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.block.9.layer.1.layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.embed_tokens.weight": "pytorch_model-00001-of-00002.bin",
-    "encoder.final_layer_norm.weight": "pytorch_model-00001-of-00002.bin",
-    "lm_head.weight": "pytorch_model-00002-of-00002.bin",
-    "shared.weight": "pytorch_model-00001-of-00002.bin"
-  }
-}

special_tokens_map.json CHANGED Viewed

@@ -101,7 +101,25 @@
     "<extra_id_98>",
     "<extra_id_99>"
   ],
-  "eos_token": "</s>",
-  "pad_token": "<pad>",
-  "unk_token": "<unk>"
 }

     "<extra_id_98>",
     "<extra_id_99>"
   ],
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<pad>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
 }

spiece.model CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:df65ad942bf9646454eaaff5bdb08e155b6b4e105ae8e2dac96d0ebb2e455073
-size 2148928

 version https://git-lfs.github.com/spec/v1
+oid sha256:ef11ac9a22c7503492f56d48dce53be20e339b63605983e9f27d2cd0e0f3922c
+size 4427844

tokenizer.json CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:15aa9b3cbcf2ec220c6044f898e27fd302067a336df5369ec55a8f385f849d3b
-size 8236606

 version https://git-lfs.github.com/spec/v1
+oid sha256:8ceb4f73d0ecf9131e8fbf80b7b5261c202f85b69d3b24f822bf59249b16cccc
+size 16634421

tokenizer_config.json CHANGED Viewed

@@ -1,4 +1,830 @@
 {
   "additional_special_tokens": [
     "<extra_id_0>",
     "<extra_id_1>",

 {
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "256000": {
+      "content": "<extra_id_99>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": true,
+      "special": true
+    },
+    "256001": {
+      "content": "<extra_id_98>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": true,
+      "special": true
+    },
+    "256002": {
+      "content": "<extra_id_97>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": true,
+      "special": true
+    },
+    "256003": {
+      "content": "<extra_id_96>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": true,
+      "special": true
+    },
+    "256004": {
+      "content": "<extra_id_95>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": true,
+      "special": true
+    },
+    "256005": {
+      "content": "<extra_id_94>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": true,
+      "special": true
+    },
+    "256006": {
+      "content": "<extra_id_93>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": true,
+      "special": true
+    },
+    "256007": {
+      "content": "<extra_id_92>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": true,
+      "special": true
+    },
+    "256008": {
+      "content": "<extra_id_91>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": true,
+      "special": true
+    },
+    "256009": {
+      "content": "<extra_id_90>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": true,
+      "special": true
+    },
+    "256010": {
+      "content": "<extra_id_89>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": true,
+      "special": true
+    },
+    "256011": {
+      "content": "<extra_id_88>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": true,
+      "special": true
+    },
+    "256012": {
+      "content": "<extra_id_87>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": true,
+      "special": true
+    },
+    "256013": {
+      "content": "<extra_id_86>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": true,
+      "special": true
+    },
+    "256014": {
+      "content": "<extra_id_85>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": true,
+      "special": true
+    },
+    "256015": {
+      "content": "<extra_id_84>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": true,
+      "special": true
+    },
+    "256016": {
+      "content": "<extra_id_83>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": true,
+      "special": true
+    },
+    "256017": {
+      "content": "<extra_id_82>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": true,
+      "special": true
+    },
+    "256018": {
+      "content": "<extra_id_81>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": true,
+      "special": true
+    },
+    "256019": {
+      "content": "<extra_id_80>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": true,
+      "special": true
+    },
+    "256020": {
+      "content": "<extra_id_79>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": true,
+      "special": true
+    },
+    "256021": {
+      "content": "<extra_id_78>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": true,
+      "special": true
+    },
+    "256022": {
+      "content": "<extra_id_77>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": true,
+      "special": true
+    },
+    "256023": {
+      "content": "<extra_id_76>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": true,
+      "special": true
+    },
+    "256024": {
+      "content": "<extra_id_75>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": true,
+      "special": true
+    },
+    "256025": {
+      "content": "<extra_id_74>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": true,
+      "special": true
+    },
+    "256026": {
+      "content": "<extra_id_73>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": true,
+      "special": true
+    },
+    "256027": {
+      "content": "<extra_id_72>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": true,
+      "special": true
+    },
+    "256028": {
+      "content": "<extra_id_71>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": true,
+      "special": true
+    },
+    "256029": {
+      "content": "<extra_id_70>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": true,
+      "special": true
+    },
+    "256030": {
+      "content": "<extra_id_69>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": true,
+      "special": true
+    },
+    "256031": {
+      "content": "<extra_id_68>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": true,
+      "special": true
+    },
+    "256032": {
+      "content": "<extra_id_67>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": true,
+      "special": true
+    },
+    "256033": {
+      "content": "<extra_id_66>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": true,
+      "special": true
+    },
+    "256034": {
+      "content": "<extra_id_65>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": true,
+      "special": true
+    },
+    "256035": {
+      "content": "<extra_id_64>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": true,
+      "special": true
+    },
+    "256036": {
+      "content": "<extra_id_63>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": true,
+      "special": true
+    },
+    "256037": {
+      "content": "<extra_id_62>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": true,
+      "special": true
+    },
+    "256038": {
+      "content": "<extra_id_61>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": true,
+      "special": true
+    },
+    "256039": {
+      "content": "<extra_id_60>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": true,
+      "special": true
+    },
+    "256040": {
+      "content": "<extra_id_59>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": true,
+      "special": true
+    },
+    "256041": {
+      "content": "<extra_id_58>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": true,
+      "special": true
+    },
+    "256042": {
+      "content": "<extra_id_57>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": true,
+      "special": true
+    },
+    "256043": {
+      "content": "<extra_id_56>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": true,
+      "special": true
+    },
+    "256044": {
+      "content": "<extra_id_55>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": true,
+      "special": true
+    },
+    "256045": {
+      "content": "<extra_id_54>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": true,
+      "special": true
+    },
+    "256046": {
+      "content": "<extra_id_53>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": true,
+      "special": true
+    },
+    "256047": {
+      "content": "<extra_id_52>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": true,
+      "special": true
+    },
+    "256048": {
+      "content": "<extra_id_51>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": true,
+      "special": true
+    },
+    "256049": {
+      "content": "<extra_id_50>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": true,
+      "special": true
+    },
+    "256050": {
+      "content": "<extra_id_49>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": true,
+      "special": true
+    },
+    "256051": {
+      "content": "<extra_id_48>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": true,
+      "special": true
+    },
+    "256052": {
+      "content": "<extra_id_47>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": true,
+      "special": true
+    },
+    "256053": {
+      "content": "<extra_id_46>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": true,
+      "special": true
+    },
+    "256054": {
+      "content": "<extra_id_45>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": true,
+      "special": true
+    },
+    "256055": {
+      "content": "<extra_id_44>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": true,
+      "special": true
+    },
+    "256056": {
+      "content": "<extra_id_43>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": true,
+      "special": true
+    },
+    "256057": {
+      "content": "<extra_id_42>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": true,
+      "special": true
+    },
+    "256058": {
+      "content": "<extra_id_41>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": true,
+      "special": true
+    },
+    "256059": {
+      "content": "<extra_id_40>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": true,
+      "special": true
+    },
+    "256060": {
+      "content": "<extra_id_39>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": true,
+      "special": true
+    },
+    "256061": {
+      "content": "<extra_id_38>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": true,
+      "special": true
+    },
+    "256062": {
+      "content": "<extra_id_37>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": true,
+      "special": true
+    },
+    "256063": {
+      "content": "<extra_id_36>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": true,
+      "special": true
+    },
+    "256064": {
+      "content": "<extra_id_35>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": true,
+      "special": true
+    },
+    "256065": {
+      "content": "<extra_id_34>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": true,
+      "special": true
+    },
+    "256066": {
+      "content": "<extra_id_33>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": true,
+      "special": true
+    },
+    "256067": {
+      "content": "<extra_id_32>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": true,
+      "special": true
+    },
+    "256068": {
+      "content": "<extra_id_31>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": true,
+      "special": true
+    },
+    "256069": {
+      "content": "<extra_id_30>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": true,
+      "special": true
+    },
+    "256070": {
+      "content": "<extra_id_29>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": true,
+      "special": true
+    },
+    "256071": {
+      "content": "<extra_id_28>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": true,
+      "special": true
+    },
+    "256072": {
+      "content": "<extra_id_27>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": true,
+      "special": true
+    },
+    "256073": {
+      "content": "<extra_id_26>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": true,
+      "special": true
+    },
+    "256074": {
+      "content": "<extra_id_25>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": true,
+      "special": true
+    },
+    "256075": {
+      "content": "<extra_id_24>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": true,
+      "special": true
+    },
+    "256076": {
+      "content": "<extra_id_23>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": true,
+      "special": true
+    },
+    "256077": {
+      "content": "<extra_id_22>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": true,
+      "special": true
+    },
+    "256078": {
+      "content": "<extra_id_21>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": true,
+      "special": true
+    },
+    "256079": {
+      "content": "<extra_id_20>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": true,
+      "special": true
+    },
+    "256080": {
+      "content": "<extra_id_19>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": true,
+      "special": true
+    },
+    "256081": {
+      "content": "<extra_id_18>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": true,
+      "special": true
+    },
+    "256082": {
+      "content": "<extra_id_17>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": true,
+      "special": true
+    },
+    "256083": {
+      "content": "<extra_id_16>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": true,
+      "special": true
+    },
+    "256084": {
+      "content": "<extra_id_15>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": true,
+      "special": true
+    },
+    "256085": {
+      "content": "<extra_id_14>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": true,
+      "special": true
+    },
+    "256086": {
+      "content": "<extra_id_13>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": true,
+      "special": true
+    },
+    "256087": {
+      "content": "<extra_id_12>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": true,
+      "special": true
+    },
+    "256088": {
+      "content": "<extra_id_11>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": true,
+      "special": true
+    },
+    "256089": {
+      "content": "<extra_id_10>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": true,
+      "special": true
+    },
+    "256090": {
+      "content": "<extra_id_9>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": true,
+      "special": true
+    },
+    "256091": {
+      "content": "<extra_id_8>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": true,
+      "special": true
+    },
+    "256092": {
+      "content": "<extra_id_7>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": true,
+      "special": true
+    },
+    "256093": {
+      "content": "<extra_id_6>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": true,
+      "special": true
+    },
+    "256094": {
+      "content": "<extra_id_5>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": true,
+      "special": true
+    },
+    "256095": {
+      "content": "<extra_id_4>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": true,
+      "special": true
+    },
+    "256096": {
+      "content": "<extra_id_3>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": true,
+      "special": true
+    },
+    "256097": {
+      "content": "<extra_id_2>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": true,
+      "special": true
+    },
+    "256098": {
+      "content": "<extra_id_1>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": true,
+      "special": true
+    },
+    "256099": {
+      "content": "<extra_id_0>",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": true,
+      "special": true
+    },
+    "256100": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
   "additional_special_tokens": [
     "<extra_id_0>",
     "<extra_id_1>",