README.md CHANGED
@@ -1,3 +1,76 @@
1
- ---
2
- license: mit
3
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## Giga-Embeddings-instruct
2
+
3
+
4
+ Эта модель имеет 27 слоев, а размер эмбеддинга составляет 2048.
5
+
6
+ ## Использование
7
+
8
+ Ниже приведен пример кодирования запросов и текстов.
9
+
10
+
11
+ ### Transformers
12
+
13
+ ```python
14
+ import os
15
+ import torch
16
+ import torch.nn.functional as F
17
+ from transformers import AutoTokenizer, AutoModel
18
+
19
+ # Each query needs to be accompanied by an corresponding instruction describing the task.
20
+ task_name_to_instruct = {"example": "Given a question, retrieve passages that answer the question",}
21
+
22
+ query_prefix = task_name_to_instruct["example"] + "\nquery: "
23
+ queries = [
24
+ 'are judo throws allowed in wrestling?',
25
+ 'how to become a radiology technician in michigan?'
26
+ ]
27
+
28
+ # No instruction needed for retrieval passages
29
+ passage_prefix = ""
30
+ passages = [
31
+ "Since you're reading this, you are probably someone from a judo background or someone who is just wondering how judo techniques can be applied under wrestling rules. So without further ado, let's get to the question. Are Judo throws allowed in wrestling? Yes, judo throws are allowed in freestyle and folkstyle wrestling. You only need to be careful to follow the slam rules when executing judo throws. In wrestling, a slam is lifting and returning an opponent to the mat with unnecessary force.",
32
+ "Below are the basic steps to becoming a radiologic technologist in Michigan:Earn a high school diploma. As with most careers in health care, a high school education is the first step to finding entry-level employment. Taking classes in math and science, such as anatomy, biology, chemistry, physiology, and physics, can help prepare students for their college studies and future careers.Earn an associate degree. Entry-level radiologic positions typically require at least an Associate of Applied Science. Before enrolling in one of these degree programs, students should make sure it has been properly accredited by the Joint Review Committee on Education in Radiologic Technology (JRCERT).Get licensed or certified in the state of Michigan."
33
+ ]
34
+
35
+ # load model with tokenizer
36
+ model_path = os.getcwd() ## TODO ???
37
+ model = AutoModel.from_pretrained(model_path, trust_remote_code=True)
38
+
39
+ # get the embeddings
40
+ query_embeddings = model.encode(queries, instruction=query_prefix)
41
+ passage_embeddings = model.encode(passages, instruction=passage_prefix)
42
+
43
+ # normalize embeddings
44
+ query_embeddings = F.normalize(query_embeddings, p=2, dim=1)
45
+ passage_embeddings = F.normalize(passage_embeddings, p=2, dim=1)
46
+
47
+ scores = (query_embeddings @ passage_embeddings.T) * 100
48
+ print(scores.tolist())
49
+ ```
50
+
51
+ ## Поддерживаемые языки
52
+
53
+ Эта модель инициализирована pretrain моделью GigaChat и дополнительно обучена на смеси английских и русских данных. Однако, поскольку pretrain GigaChat'a делался в основном на русскоязычных данных, мы рекомендуем использовать эту модель только для русского языка.
54
+
55
+ ## FAQ
56
+
57
+ 1. Нужно ли добавлять инструкции к запросу?
58
+
59
+ Да, именно так модель обучалась, иначе вы увидите снижение производительности. Определение задачи должно быть инструкцией в одном предложении, которая описывает задачу. Это способ настройки текстовых эмбеддингов для разных сценариев с помощью инструкций на естественном языке.
60
+
61
+ Пожалуйста, ознакомьтесь с (## TODO link to instructions) для инструкций, которые мы использовали для замеров.
62
+
63
+ С другой стороны, добавлять инструкции на сторону документа не требуется.
64
+
65
+ 2. Почему мои воспроизведённые результаты немного отличаются от указанных в карточке модели?
66
+
67
+ Разные версии библиотек transformers и pytorch могут вызывать незначительные, но ненулевые различия в производительности.
68
+
69
+
70
+ ## Ограничения
71
+
72
+ Использование этой модели для входных данных, содержащих более 4096 токенов, невозможно.
73
+
74
+ ## Лицензия
75
+
76
+ MIT
config.json ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/home/jovyan/ekolodin/models/gigarembed-release-v1/config.json",
3
+ "add_eos": true,
4
+ "add_pad_token": true,
5
+ "architectures": [
6
+ "GigarEmbedModel"
7
+ ],
8
+ "auto_map": {
9
+ "AutoConfig": "configuration_gigarembed.GigarEmbedConfig",
10
+ "AutoModel": "modeling_gigarembed.GigarEmbedModel"
11
+ },
12
+ "hidden_size": 2048,
13
+ "is_mask_instruction": false,
14
+ "latent_attention_config": {
15
+ "cross_dim_head": 2048,
16
+ "hidden_dim": 2048,
17
+ "latent_dim": 2048,
18
+ "model_type": "latent_attention"
19
+ },
20
+ "mask_type": "b",
21
+ "model_type": "gigarembed",
22
+ "padding_side": "right",
23
+ "text_config": {
24
+ "_name_or_path": "/home/jovyan/ekolodin/models/gigarembed-release-v1/",
25
+ "activation_checkpoint_layers_num": null,
26
+ "add_cross_attention": false,
27
+ "architectures": [
28
+ "LlamaForCausalLM"
29
+ ],
30
+ "attention_bias": false,
31
+ "attention_dropout": 0.0,
32
+ "attention_hidden_size": null,
33
+ "attention_type": "LlamaPackedAttention",
34
+ "bad_words_ids": null,
35
+ "begin_suppress_tokens": null,
36
+ "bos_token_id": 1,
37
+ "chunk_size_feed_forward": 0,
38
+ "cross_attention_hidden_size": null,
39
+ "decoder_start_token_id": null,
40
+ "deterministic_attention": false,
41
+ "diversity_penalty": 0.0,
42
+ "do_sample": false,
43
+ "early_stopping": false,
44
+ "encoder_no_repeat_ngram_size": 0,
45
+ "eos_token_id": 2,
46
+ "exponential_decay_length_penalty": null,
47
+ "finetuning_task": null,
48
+ "forced_bos_token_id": null,
49
+ "forced_eos_token_id": null,
50
+ "freeze_non_embed": false,
51
+ "fused_mlp": true,
52
+ "fused_mlp_checkpoint_lvl": 3,
53
+ "head_dim": 128,
54
+ "hidden_act": "silu",
55
+ "hidden_size": 2048,
56
+ "id2label": {
57
+ "0": "LABEL_0",
58
+ "1": "LABEL_1"
59
+ },
60
+ "init_device": "meta",
61
+ "initializer_range": 0.02,
62
+ "intermediate_size": 11008,
63
+ "is_decoder": false,
64
+ "is_encoder_decoder": false,
65
+ "label2id": {
66
+ "LABEL_0": 0,
67
+ "LABEL_1": 1
68
+ },
69
+ "length_penalty": 1.0,
70
+ "loss_inplace_backward": true,
71
+ "max_length": 20,
72
+ "max_position_embeddings": 32768,
73
+ "max_window_layers": 36,
74
+ "min_length": 0,
75
+ "mlp_bias": false,
76
+ "model_type": "llama",
77
+ "no_repeat_ngram_size": 0,
78
+ "num_attention_heads": 16,
79
+ "num_beam_groups": 1,
80
+ "num_beams": 1,
81
+ "num_hidden_layers": 27,
82
+ "num_key_value_heads": 2,
83
+ "num_return_sequences": 1,
84
+ "output_attentions": false,
85
+ "output_hidden_states": false,
86
+ "output_scores": false,
87
+ "pad_token_id": 2,
88
+ "prefix": null,
89
+ "pretraining_tp": 1,
90
+ "problem_type": null,
91
+ "pruned_heads": {},
92
+ "remove_invalid_values": false,
93
+ "repetition_penalty": 1.0,
94
+ "return_dict": true,
95
+ "return_dict_in_generate": false,
96
+ "rms_norm_eps": 1e-06,
97
+ "rope_scaling": null,
98
+ "rope_theta": 1300,
99
+ "sep_token_id": null,
100
+ "sliding_window": null,
101
+ "sp_split_type": "equal",
102
+ "suppress_tokens": null,
103
+ "task_specific_params": null,
104
+ "temperature": 1.0,
105
+ "tf_legacy_loss": false,
106
+ "tie_encoder_decoder": false,
107
+ "tie_word_embeddings": false,
108
+ "tokenizer_class": null,
109
+ "top_k": 50,
110
+ "top_p": 1.0,
111
+ "torch_dtype": "float32",
112
+ "torchscript": false,
113
+ "tp_group": null,
114
+ "tp_size": 1,
115
+ "typical_p": 1.0,
116
+ "unk_token_id": 0,
117
+ "use_bfloat16": false,
118
+ "use_cache": true,
119
+ "use_mrope": false,
120
+ "use_sliding_window": false,
121
+ "varlen_input": false,
122
+ "vocab_size": 128256
123
+ },
124
+ "torch_dtype": "float32",
125
+ "transformers_version": "4.40.0.dev0"
126
+ }
configuration_gigarembed.py ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Literal
2
+ from transformers import AutoConfig
3
+ from transformers.configuration_utils import PretrainedConfig
4
+ from transformers.models.auto import CONFIG_MAPPING
5
+ from transformers.models.llama import LlamaConfig
6
+
7
+ GIGAREMBED_TYPE = "gigarembed"
8
+ LATENT_ATTENTION_TYPE = "latent_attention"
9
+ BIDIR_LLAMA_TYPE = "bidir_llama"
10
+
11
+ class GigarEmbedConfig(PretrainedConfig):
12
+ model_type = "gigarembed"
13
+ is_composition = False
14
+
15
+ def __init__(
16
+ self,
17
+ latent_attention_config=None,
18
+ text_config=None,
19
+ padding_side: Literal["right", "left"]="right",
20
+ add_pad_token: bool=True,
21
+ is_mask_instruction: bool = True,
22
+ add_eos: bool=True,
23
+ mask_type: str="b",
24
+ **kwargs,
25
+ ):
26
+ if isinstance(latent_attention_config, dict):
27
+ latent_attention_config["model_type"] = (
28
+ latent_attention_config["model_type"] if "model_type" in latent_attention_config else LATENT_ATTENTION_TYPE
29
+ )
30
+ latent_attention_config = CONFIG_MAPPING[latent_attention_config["model_type"]](**latent_attention_config)
31
+ elif latent_attention_config is None:
32
+ latent_attention_config = CONFIG_MAPPING[LATENT_ATTENTION_TYPE]()
33
+
34
+ self.latent_attention_config = latent_attention_config
35
+
36
+ if isinstance(text_config, dict):
37
+ text_config["model_type"] = text_config["model_type"] if "model_type" in text_config else "llama"
38
+ text_config = CONFIG_MAPPING[text_config["model_type"]](**text_config)
39
+ elif text_config is None:
40
+ text_config = None
41
+
42
+ self.text_config = text_config
43
+ self.padding_side = padding_side
44
+ self.is_mask_instruction = is_mask_instruction
45
+ self.add_pad_token = add_pad_token
46
+ self.add_eos = add_eos
47
+ self.mask_type = mask_type
48
+ if "hidden_size" in kwargs:
49
+ self.hidden_size = kwargs["hidden_size"]
50
+ else:
51
+ self.hidden_size = 2560
52
+
53
+ super().__init__(**kwargs)
54
+
55
+
56
+ class LatentAttentionConfig(PretrainedConfig):
57
+ model_type = LATENT_ATTENTION_TYPE
58
+ is_composition = False
59
+ _name_or_path = "latent_attention"
60
+
61
+ def __init__(
62
+ self,
63
+ num_latents_value: int=512,
64
+ num_cross_heads: int=8,
65
+ output_normalize: bool=True,
66
+ hidden_dim: int=2560,
67
+ latent_dim: int=2560,
68
+ cross_dim_head: int=2560,
69
+ **kwargs,
70
+ ):
71
+ self.num_latents_value = num_latents_value
72
+ self.num_cross_heads = num_cross_heads
73
+ self.output_normalize = output_normalize
74
+ self.hidden_dim = hidden_dim
75
+ self.latent_dim = latent_dim
76
+ self.cross_dim_head = cross_dim_head
77
+
78
+
79
+ class BidirectionalLlamaConfig(LlamaConfig):
80
+ model_type = BIDIR_LLAMA_TYPE
81
+ keys_to_ignore_at_inference = ["past_key_values"]
82
+
83
+ AutoConfig.register(GIGAREMBED_TYPE, GigarEmbedConfig)
84
+ AutoConfig.register(LATENT_ATTENTION_TYPE, LatentAttentionConfig)
85
+ AutoConfig.register(BIDIR_LLAMA_TYPE, BidirectionalLlamaConfig)
86
+
87
+ GigarEmbedConfig.register_for_auto_class()
88
+ LatentAttentionConfig.register_for_auto_class()
89
+ BidirectionalLlamaConfig.register_for_auto_class()
model-00001-of-00003.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9f5bdcf6ab584d8e3fa1e53ad3061a203125be81e043b11e7c867e04670e8aa7
3
+ size 4913926592
model-00002-of-00003.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:29f27ca086d141ee606037884f8bb93176a61032230ba1ca92ba0c2fe7615cb2
3
+ size 4932780264
model-00003-of-00003.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:60c4fbeddfae2f00234d2d9e6646cd7a11914454eaf46e278f85bfb088d1417f
3
+ size 270557856
model.safetensors.index.json ADDED
@@ -0,0 +1,266 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "total_size": 10117234688
4
+ },
5
+ "weight_map": {
6
+ "latent_attention_model.cross_attend_blocks.0.fn.to_kv.weight": "model-00001-of-00003.safetensors",
7
+ "latent_attention_model.cross_attend_blocks.0.fn.to_out.weight": "model-00001-of-00003.safetensors",
8
+ "latent_attention_model.cross_attend_blocks.0.fn.to_q.weight": "model-00001-of-00003.safetensors",
9
+ "latent_attention_model.cross_attend_blocks.0.norm.bias": "model-00001-of-00003.safetensors",
10
+ "latent_attention_model.cross_attend_blocks.0.norm.weight": "model-00001-of-00003.safetensors",
11
+ "latent_attention_model.cross_attend_blocks.0.norm_context.bias": "model-00001-of-00003.safetensors",
12
+ "latent_attention_model.cross_attend_blocks.0.norm_context.weight": "model-00001-of-00003.safetensors",
13
+ "latent_attention_model.cross_attend_blocks.1.fn.net.0.bias": "model-00001-of-00003.safetensors",
14
+ "latent_attention_model.cross_attend_blocks.1.fn.net.0.weight": "model-00001-of-00003.safetensors",
15
+ "latent_attention_model.cross_attend_blocks.1.fn.net.2.bias": "model-00001-of-00003.safetensors",
16
+ "latent_attention_model.cross_attend_blocks.1.fn.net.2.weight": "model-00001-of-00003.safetensors",
17
+ "latent_attention_model.cross_attend_blocks.1.norm.bias": "model-00001-of-00003.safetensors",
18
+ "latent_attention_model.cross_attend_blocks.1.norm.weight": "model-00001-of-00003.safetensors",
19
+ "latent_attention_model.latents": "model-00001-of-00003.safetensors",
20
+ "model.embed_tokens.weight": "model-00001-of-00003.safetensors",
21
+ "model.layers.0.input_layernorm.weight": "model-00001-of-00003.safetensors",
22
+ "model.layers.0.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
23
+ "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
24
+ "model.layers.0.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
25
+ "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
26
+ "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
27
+ "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
28
+ "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
29
+ "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
30
+ "model.layers.1.input_layernorm.weight": "model-00001-of-00003.safetensors",
31
+ "model.layers.1.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
32
+ "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
33
+ "model.layers.1.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
34
+ "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
35
+ "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
36
+ "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
37
+ "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
38
+ "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
39
+ "model.layers.10.input_layernorm.weight": "model-00002-of-00003.safetensors",
40
+ "model.layers.10.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
41
+ "model.layers.10.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
42
+ "model.layers.10.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
43
+ "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
44
+ "model.layers.10.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
45
+ "model.layers.10.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
46
+ "model.layers.10.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
47
+ "model.layers.10.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
48
+ "model.layers.11.input_layernorm.weight": "model-00002-of-00003.safetensors",
49
+ "model.layers.11.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
50
+ "model.layers.11.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
51
+ "model.layers.11.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
52
+ "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
53
+ "model.layers.11.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
54
+ "model.layers.11.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
55
+ "model.layers.11.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
56
+ "model.layers.11.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
57
+ "model.layers.12.input_layernorm.weight": "model-00002-of-00003.safetensors",
58
+ "model.layers.12.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
59
+ "model.layers.12.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
60
+ "model.layers.12.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
61
+ "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
62
+ "model.layers.12.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
63
+ "model.layers.12.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
64
+ "model.layers.12.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
65
+ "model.layers.12.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
66
+ "model.layers.13.input_layernorm.weight": "model-00002-of-00003.safetensors",
67
+ "model.layers.13.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
68
+ "model.layers.13.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
69
+ "model.layers.13.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
70
+ "model.layers.13.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
71
+ "model.layers.13.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
72
+ "model.layers.13.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
73
+ "model.layers.13.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
74
+ "model.layers.13.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
75
+ "model.layers.14.input_layernorm.weight": "model-00002-of-00003.safetensors",
76
+ "model.layers.14.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
77
+ "model.layers.14.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
78
+ "model.layers.14.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
79
+ "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
80
+ "model.layers.14.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
81
+ "model.layers.14.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
82
+ "model.layers.14.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
83
+ "model.layers.14.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
84
+ "model.layers.15.input_layernorm.weight": "model-00002-of-00003.safetensors",
85
+ "model.layers.15.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
86
+ "model.layers.15.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
87
+ "model.layers.15.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
88
+ "model.layers.15.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
89
+ "model.layers.15.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
90
+ "model.layers.15.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
91
+ "model.layers.15.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
92
+ "model.layers.15.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
93
+ "model.layers.16.input_layernorm.weight": "model-00002-of-00003.safetensors",
94
+ "model.layers.16.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
95
+ "model.layers.16.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
96
+ "model.layers.16.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
97
+ "model.layers.16.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
98
+ "model.layers.16.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
99
+ "model.layers.16.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
100
+ "model.layers.16.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
101
+ "model.layers.16.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
102
+ "model.layers.17.input_layernorm.weight": "model-00002-of-00003.safetensors",
103
+ "model.layers.17.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
104
+ "model.layers.17.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
105
+ "model.layers.17.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
106
+ "model.layers.17.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
107
+ "model.layers.17.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
108
+ "model.layers.17.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
109
+ "model.layers.17.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
110
+ "model.layers.17.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
111
+ "model.layers.18.input_layernorm.weight": "model-00002-of-00003.safetensors",
112
+ "model.layers.18.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
113
+ "model.layers.18.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
114
+ "model.layers.18.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
115
+ "model.layers.18.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
116
+ "model.layers.18.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
117
+ "model.layers.18.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
118
+ "model.layers.18.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
119
+ "model.layers.18.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
120
+ "model.layers.19.input_layernorm.weight": "model-00002-of-00003.safetensors",
121
+ "model.layers.19.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
122
+ "model.layers.19.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
123
+ "model.layers.19.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
124
+ "model.layers.19.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
125
+ "model.layers.19.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
126
+ "model.layers.19.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
127
+ "model.layers.19.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
128
+ "model.layers.19.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
129
+ "model.layers.2.input_layernorm.weight": "model-00001-of-00003.safetensors",
130
+ "model.layers.2.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
131
+ "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
132
+ "model.layers.2.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
133
+ "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
134
+ "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
135
+ "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
136
+ "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
137
+ "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
138
+ "model.layers.20.input_layernorm.weight": "model-00002-of-00003.safetensors",
139
+ "model.layers.20.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
140
+ "model.layers.20.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
141
+ "model.layers.20.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
142
+ "model.layers.20.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
143
+ "model.layers.20.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
144
+ "model.layers.20.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
145
+ "model.layers.20.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
146
+ "model.layers.20.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
147
+ "model.layers.21.input_layernorm.weight": "model-00002-of-00003.safetensors",
148
+ "model.layers.21.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
149
+ "model.layers.21.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
150
+ "model.layers.21.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
151
+ "model.layers.21.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
152
+ "model.layers.21.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
153
+ "model.layers.21.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
154
+ "model.layers.21.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
155
+ "model.layers.21.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
156
+ "model.layers.22.input_layernorm.weight": "model-00002-of-00003.safetensors",
157
+ "model.layers.22.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
158
+ "model.layers.22.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
159
+ "model.layers.22.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
160
+ "model.layers.22.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
161
+ "model.layers.22.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
162
+ "model.layers.22.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
163
+ "model.layers.22.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
164
+ "model.layers.22.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
165
+ "model.layers.23.input_layernorm.weight": "model-00002-of-00003.safetensors",
166
+ "model.layers.23.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
167
+ "model.layers.23.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
168
+ "model.layers.23.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
169
+ "model.layers.23.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
170
+ "model.layers.23.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
171
+ "model.layers.23.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
172
+ "model.layers.23.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
173
+ "model.layers.23.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
174
+ "model.layers.24.input_layernorm.weight": "model-00002-of-00003.safetensors",
175
+ "model.layers.24.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
176
+ "model.layers.24.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
177
+ "model.layers.24.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
178
+ "model.layers.24.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
179
+ "model.layers.24.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
180
+ "model.layers.24.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
181
+ "model.layers.24.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
182
+ "model.layers.24.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
183
+ "model.layers.25.input_layernorm.weight": "model-00002-of-00003.safetensors",
184
+ "model.layers.25.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
185
+ "model.layers.25.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
186
+ "model.layers.25.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
187
+ "model.layers.25.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
188
+ "model.layers.25.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
189
+ "model.layers.25.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
190
+ "model.layers.25.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
191
+ "model.layers.25.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
192
+ "model.layers.26.input_layernorm.weight": "model-00003-of-00003.safetensors",
193
+ "model.layers.26.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
194
+ "model.layers.26.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
195
+ "model.layers.26.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
196
+ "model.layers.26.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
197
+ "model.layers.26.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
198
+ "model.layers.26.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
199
+ "model.layers.26.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
200
+ "model.layers.26.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
201
+ "model.layers.3.input_layernorm.weight": "model-00001-of-00003.safetensors",
202
+ "model.layers.3.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
203
+ "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
204
+ "model.layers.3.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
205
+ "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
206
+ "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
207
+ "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
208
+ "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
209
+ "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
210
+ "model.layers.4.input_layernorm.weight": "model-00001-of-00003.safetensors",
211
+ "model.layers.4.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
212
+ "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
213
+ "model.layers.4.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
214
+ "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
215
+ "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
216
+ "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
217
+ "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
218
+ "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
219
+ "model.layers.5.input_layernorm.weight": "model-00001-of-00003.safetensors",
220
+ "model.layers.5.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
221
+ "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
222
+ "model.layers.5.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
223
+ "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
224
+ "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
225
+ "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
226
+ "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
227
+ "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
228
+ "model.layers.6.input_layernorm.weight": "model-00001-of-00003.safetensors",
229
+ "model.layers.6.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
230
+ "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
231
+ "model.layers.6.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
232
+ "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
233
+ "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
234
+ "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
235
+ "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
236
+ "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
237
+ "model.layers.7.input_layernorm.weight": "model-00001-of-00003.safetensors",
238
+ "model.layers.7.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
239
+ "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
240
+ "model.layers.7.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
241
+ "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
242
+ "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
243
+ "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
244
+ "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
245
+ "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
246
+ "model.layers.8.input_layernorm.weight": "model-00001-of-00003.safetensors",
247
+ "model.layers.8.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
248
+ "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
249
+ "model.layers.8.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
250
+ "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
251
+ "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
252
+ "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
253
+ "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
254
+ "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
255
+ "model.layers.9.input_layernorm.weight": "model-00001-of-00003.safetensors",
256
+ "model.layers.9.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
257
+ "model.layers.9.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
258
+ "model.layers.9.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
259
+ "model.layers.9.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
260
+ "model.layers.9.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
261
+ "model.layers.9.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
262
+ "model.layers.9.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
263
+ "model.layers.9.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
264
+ "model.norm.weight": "model-00003-of-00003.safetensors"
265
+ }
266
+ }
modeling_gigarembed.py ADDED
@@ -0,0 +1,448 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List, Union, Dict, Mapping, Optional, Tuple, TypedDict
2
+ import torch
3
+ import os
4
+ import json
5
+ import numpy as np
6
+ from functools import partial
7
+ from contextlib import nullcontext
8
+ from transformers import AutoModel, PreTrainedTokenizerFast, BatchEncoding, DataCollatorWithPadding
9
+ from transformers.modeling_utils import PreTrainedModel
10
+ from transformers.models.auto import AutoTokenizer
11
+ from transformers.models.llama.modeling_llama import LLAMA_INPUTS_DOCSTRING
12
+ from transformers.modeling_outputs import BaseModelOutputWithPast
13
+ from transformers.modeling_attn_mask_utils import _prepare_4d_attention_mask, _prepare_4d_attention_mask_for_sdpa
14
+ from transformers import LlamaModel, LlamaConfig
15
+ from transformers.cache_utils import Cache, DynamicCache
16
+ from transformers.utils import (
17
+ add_start_docstrings_to_model_forward,
18
+ logging,
19
+ )
20
+ from einops import rearrange, repeat
21
+ from tqdm.auto import tqdm
22
+ from datasets import Dataset
23
+ from torch.utils.data import DataLoader
24
+ from .configuration_gigarembed import GigarEmbedConfig, LatentAttentionConfig, BidirectionalLlamaConfig
25
+
26
+ logger = logging.get_logger(__name__)
27
+
28
+ class GigarEmbedFeatures(TypedDict):
29
+ input_dict: torch.Tensor
30
+ attention_mask: torch.Tensor
31
+ pool_mask: torch.Tensor
32
+
33
+ class BidirectionalLlamaModel(LlamaModel):
34
+ config_class = BidirectionalLlamaConfig
35
+
36
+ def __init__(self, config: LlamaConfig):
37
+ super().__init__(config)
38
+ for layer in self.layers:
39
+ layer.self_attn.is_causal = False
40
+ self._attn_implementation = "eager"
41
+
42
+ @add_start_docstrings_to_model_forward(LLAMA_INPUTS_DOCSTRING)
43
+ def forward(
44
+ self,
45
+ input_ids: torch.LongTensor = None,
46
+ attention_mask: Optional[torch.Tensor] = None,
47
+ position_ids: Optional[torch.LongTensor] = None,
48
+ past_key_values: Optional[List[torch.FloatTensor]] = None,
49
+ inputs_embeds: Optional[torch.FloatTensor] = None,
50
+ use_cache: Optional[bool] = None,
51
+ output_attentions: Optional[bool] = None,
52
+ output_hidden_states: Optional[bool] = None,
53
+ return_dict: Optional[bool] = None,
54
+ ) -> Union[Tuple, BaseModelOutputWithPast]:
55
+ output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
56
+ output_hidden_states = (
57
+ output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
58
+ )
59
+ use_cache = use_cache if use_cache is not None else self.config.use_cache
60
+
61
+ return_dict = return_dict if return_dict is not None else self.config.use_return_dict
62
+
63
+ # retrieve input_ids and inputs_embeds
64
+ if input_ids is not None and inputs_embeds is not None:
65
+ raise ValueError("You cannot specify both decoder_input_ids and decoder_inputs_embeds at the same time")
66
+ elif input_ids is not None:
67
+ batch_size, seq_length = input_ids.shape
68
+ elif inputs_embeds is not None:
69
+ batch_size, seq_length, _ = inputs_embeds.shape
70
+ else:
71
+ raise ValueError("You have to specify either decoder_input_ids or decoder_inputs_embeds")
72
+
73
+ if self.gradient_checkpointing and self.training:
74
+ if use_cache:
75
+ logger.warning_once(
76
+ "`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
77
+ )
78
+ use_cache = False
79
+
80
+ past_key_values_length = 0
81
+
82
+ if use_cache:
83
+ use_legacy_cache = not isinstance(past_key_values, Cache)
84
+ if use_legacy_cache:
85
+ past_key_values = DynamicCache.from_legacy_cache(past_key_values)
86
+ past_key_values_length = past_key_values.get_usable_length(seq_length)
87
+
88
+ if position_ids is None:
89
+ device = input_ids.device if input_ids is not None else inputs_embeds.device
90
+ position_ids = torch.arange(
91
+ past_key_values_length, seq_length + past_key_values_length, dtype=torch.long, device=device
92
+ )
93
+ position_ids = position_ids.unsqueeze(0).view(-1, seq_length)
94
+ else:
95
+ position_ids = position_ids.view(-1, seq_length).long()
96
+
97
+ if inputs_embeds is None:
98
+ inputs_embeds = self.embed_tokens(input_ids)
99
+
100
+ if attention_mask is not None and self._attn_implementation == "flash_attention_2" and use_cache:
101
+ is_padding_right = attention_mask[:, -1].sum().item() != batch_size
102
+ if is_padding_right:
103
+ raise ValueError(
104
+ "You are attempting to perform batched generation with padding_side='right'"
105
+ " this may lead to unexpected behaviour for Flash Attention version of Llama. Make sure to "
106
+ " call `tokenizer.padding_side = 'left'` before tokenizing the input. "
107
+ )
108
+
109
+ if self._attn_implementation == "flash_attention_2":
110
+ # 2d mask is passed through the layers
111
+ attention_mask = attention_mask if (attention_mask is not None and 0 in attention_mask) else None
112
+ elif self._attn_implementation == "sdpa" and not output_attentions:
113
+ # output_attentions=True can not be supported when using SDPA, and we fall back on
114
+ # the manual implementation that requires a 4D causal mask in all cases.
115
+ attention_mask = _prepare_4d_attention_mask_for_sdpa(
116
+ attention_mask, inputs_embeds.dtype
117
+ )
118
+ else:
119
+ # 4d mask is passed through the layers
120
+ attention_mask = _prepare_4d_attention_mask(
121
+ attention_mask, inputs_embeds.dtype,
122
+ )
123
+
124
+ hidden_states = inputs_embeds
125
+
126
+ # create position embeddings to be shared across the decoder layers
127
+ position_embeddings = self.rotary_emb(hidden_states, position_ids)
128
+
129
+ # decoder layers
130
+ all_hidden_states = () if output_hidden_states else None
131
+ all_self_attns = () if output_attentions else None
132
+ next_decoder_cache = None
133
+
134
+ for decoder_layer in self.layers:
135
+ if output_hidden_states:
136
+ all_hidden_states += (hidden_states,)
137
+
138
+ if self.gradient_checkpointing and self.training:
139
+ layer_outputs = self._gradient_checkpointing_func(
140
+ decoder_layer.__call__,
141
+ hidden_states,
142
+ attention_mask,
143
+ position_ids,
144
+ past_key_values,
145
+ output_attentions,
146
+ use_cache,
147
+ position_embeddings=position_embeddings
148
+ )
149
+ else:
150
+ layer_outputs = decoder_layer(
151
+ hidden_states,
152
+ attention_mask=attention_mask,
153
+ position_ids=position_ids,
154
+ past_key_value=past_key_values,
155
+ output_attentions=output_attentions,
156
+ use_cache=use_cache,
157
+ position_embeddings=position_embeddings
158
+ )
159
+
160
+ hidden_states = layer_outputs[0]
161
+
162
+ if use_cache:
163
+ next_decoder_cache = layer_outputs[2 if output_attentions else 1]
164
+
165
+ if output_attentions:
166
+ all_self_attns += (layer_outputs[1],)
167
+
168
+ hidden_states = self.norm(hidden_states)
169
+
170
+ # add hidden states from the last decoder layer
171
+ if output_hidden_states:
172
+ all_hidden_states += (hidden_states,)
173
+
174
+ next_cache = None
175
+ if use_cache:
176
+ next_cache = next_decoder_cache.to_legacy_cache() if use_legacy_cache else next_decoder_cache
177
+
178
+ if not return_dict:
179
+ return tuple(v for v in [hidden_states, next_cache, all_hidden_states, all_self_attns] if v is not None)
180
+ return BaseModelOutputWithPast(
181
+ last_hidden_state=hidden_states,
182
+ past_key_values=next_cache,
183
+ hidden_states=all_hidden_states,
184
+ attentions=all_self_attns,
185
+ )
186
+
187
+ def _move_to_device(maybe_tensor, device: torch.device):
188
+ if torch.is_tensor(maybe_tensor):
189
+ return maybe_tensor.to(device, non_blocking=device.type == "cuda")
190
+ elif isinstance(maybe_tensor, dict):
191
+ return {key: _move_to_device(value, device) for key, value in maybe_tensor.items()}
192
+ elif isinstance(maybe_tensor, list):
193
+ return [_move_to_device(x, device) for x in maybe_tensor]
194
+ elif isinstance(maybe_tensor, tuple):
195
+ return tuple([_move_to_device(x, device) for x in maybe_tensor])
196
+ elif isinstance(maybe_tensor, Mapping):
197
+ return type(maybe_tensor)({k: _move_to_device(v, device) for k, v in maybe_tensor.items()})
198
+ else:
199
+ return maybe_tensor
200
+
201
+ def move_to_device(sample, device: torch.device):
202
+ if device.type == "cpu":
203
+ return sample
204
+
205
+ if len(sample) == 0:
206
+ return {}
207
+ return _move_to_device(sample, device)
208
+
209
+
210
+ def input_transform_func(
211
+ tokenizer: PreTrainedTokenizerFast,
212
+ examples: Dict[str, List],
213
+ max_length: int,
214
+ instruction: str,
215
+ ) -> BatchEncoding:
216
+ examples['input_texts'] = [instruction + input_example for input_example in examples['input_texts']]
217
+ batch_dict = tokenizer(
218
+ examples['input_texts'],
219
+ max_length=max_length,
220
+ padding=True,
221
+ return_token_type_ids=False,
222
+ return_tensors="pt",
223
+ truncation=True)
224
+ return batch_dict
225
+
226
+
227
+ class PreNorm(torch.nn.Module):
228
+ def __init__(self, dim, fn, context_dim = None):
229
+ super().__init__()
230
+ # TODO remove this layer, we don't use it
231
+
232
+ def forward(self, x, **kwargs):
233
+ return x
234
+
235
+ class GEGLU(torch.nn.Module):
236
+ def forward(self, x):
237
+ x, gates = x.chunk(2, dim = -1)
238
+ return x * torch.nn.functional.gelu(gates)
239
+
240
+ class FeedForward(torch.nn.Module):
241
+ def __init__(self, dim, mult = 4):
242
+ super().__init__()
243
+ self.net = torch.nn.Sequential(
244
+ torch.nn.Linear(dim, 2 * dim * mult),
245
+ GEGLU(),
246
+ torch.nn.Linear(dim * mult, dim)
247
+ )
248
+
249
+ def forward(self, x):
250
+ return self.net(x)
251
+
252
+ def exists(val):
253
+ return val is not None
254
+
255
+ def default(val, d):
256
+ return val if exists(val) else d
257
+
258
+
259
+ class Attention(torch.nn.Module):
260
+ def __init__(self, query_dim, context_dim = None, heads = 8, dim_head = 64):
261
+ super().__init__()
262
+ inner_dim = dim_head * heads
263
+ context_dim = default(context_dim, query_dim)
264
+ self.scale = dim_head ** -0.5
265
+ self.heads = heads
266
+
267
+ self.to_q = torch.nn.Linear(query_dim, inner_dim, bias = False)
268
+ self.to_kv = torch.nn.Linear(context_dim, inner_dim * 2, bias = False)
269
+ self.to_out = torch.nn.Linear(inner_dim, query_dim, bias = False)
270
+
271
+ def forward(self, x, context = None, mask = None):
272
+ h = self.heads
273
+ q = self.to_q(x)
274
+ context = default(context, x)
275
+ k, v = self.to_kv(context).chunk(2, dim = -1)
276
+ q, k, v = map(lambda t: rearrange(t, 'b n (h d) -> (b h) n d', h = h), (q, k, v))
277
+
278
+ attn_weights = torch.matmul(q, k.transpose(-1, -2)) / self.scale
279
+
280
+ mask_value = torch.finfo(attn_weights.dtype).min
281
+ mask_value = torch.full([], mask_value, dtype=attn_weights.dtype).to(attn_weights.device)
282
+
283
+ padding_mask = mask[:, :, None].repeat(self.heads, 1, 1).bool()
284
+
285
+ attn_weights = torch.where(padding_mask, attn_weights, mask_value)
286
+ attn_weights = torch.nn.functional.softmax(attn_weights, dim=-1)
287
+
288
+ out = torch.matmul(attn_weights, v)
289
+ out = rearrange(out, '(b h) n d -> b n (h d)', h = h)
290
+ return self.to_out(out)
291
+
292
+
293
+ class LatentAttentionModel(PreTrainedModel):
294
+ config_class = LatentAttentionConfig
295
+
296
+ def __init__(self, config: LatentAttentionConfig):
297
+ super().__init__(config)
298
+ ## cross-attention block
299
+ num_latents, latent_dim, cross_heads, cross_dim_head = config.num_latents_value, config.latent_dim, config.num_cross_heads, config.cross_dim_head
300
+ dim = config.hidden_dim
301
+ # init latent_attention and latents
302
+ self.cross_attend_blocks = torch.nn.ModuleList([
303
+ PreNorm(latent_dim, Attention(latent_dim, dim, heads = cross_heads, dim_head = cross_dim_head),
304
+ context_dim = dim),
305
+ PreNorm(latent_dim, FeedForward(latent_dim)),
306
+ ])
307
+ self.output_normalize = config.output_normalize
308
+ self.register_parameter("latents", torch.nn.Parameter(torch.randn(num_latents, latent_dim)))
309
+
310
+ def forward(self, hiddens, attention_mask: torch.Tensor=None):
311
+ # cross-attention block
312
+ cross_attn, cross_ff = self.cross_attend_blocks
313
+ b, *_, device = *hiddens.shape, hiddens.device
314
+ x = repeat(self.latents, 'n d -> b n d', b = b)
315
+ hiddens = cross_attn(hiddens, context=x, mask=attention_mask) + hiddens
316
+ hiddens = cross_ff(hiddens) + hiddens
317
+ if attention_mask != None:
318
+ s = torch.sum(hiddens * attention_mask.unsqueeze(-1).float(), dim=1)
319
+ d = attention_mask.sum(dim=1, keepdim=True).float()
320
+ hiddens = s / d
321
+ if self.output_normalize:
322
+ hiddens = torch.nn.functional.normalize(hiddens, p=2, dim=-1)
323
+ return hiddens
324
+
325
+ class GigarEmbedModel(PreTrainedModel):
326
+ config_class = GigarEmbedConfig
327
+ _no_split_modules = ["LlamaDecoderLayer", "LatentAttentionModel"]
328
+
329
+ def __init__(self, config: GigarEmbedConfig):
330
+ super().__init__(config)
331
+ self.latent_attention_model = AutoModel.from_config(config.latent_attention_config).float()
332
+ self.model = AutoModel.from_config(
333
+ config.text_config,
334
+ ) if config.text_config is not None else None
335
+ self.tokenizer = AutoTokenizer.from_pretrained(config.text_config._name_or_path) if config.text_config is not None else None
336
+ self.padding_side = config.padding_side
337
+ self.is_mask_instruction = config.is_mask_instruction
338
+ self.add_eos = config.add_eos
339
+ self.mask_type = config.mask_type
340
+ if config.add_pad_token and self.tokenizer is not None:
341
+ self.add_pad_token()
342
+
343
+ self.latent_attention_model.apply(self._init_weights)
344
+
345
+ def _init_weights(self, module):
346
+ if isinstance(module, torch.nn.Linear):
347
+ torch.nn.init.xavier_normal_(module.weight)
348
+
349
+ def add_pad_token(self):
350
+ self.tokenizer.pad_token_id = 0
351
+ self.tokenizer.padding_side = self.padding_side
352
+
353
+ def prepare_kwargs_from_batch(self, batch_dict: dict, instruction_lens: int, device: torch.device):
354
+ batch_dict = move_to_device(batch_dict, device)
355
+ attention_mask = batch_dict['attention_mask'].clone() if 'attention_mask' in batch_dict else None
356
+ if (attention_mask is not None and
357
+ self.padding_side == "right" and
358
+ self.is_mask_instruction == True and
359
+ instruction_lens > 0):
360
+ # Mask out the instruction tokens for mean-pooling
361
+ attention_mask[:, :instruction_lens] = 0
362
+ features: GigarEmbedFeatures = {
363
+ 'input_ids': torch.tensor(batch_dict.get('input_ids').to(batch_dict.get('input_ids')).long()),
364
+ 'attention_mask': batch_dict['attention_mask'],
365
+ 'pool_mask': attention_mask,
366
+ }
367
+ return features
368
+
369
+ @torch.no_grad()
370
+ def _do_encode(self,
371
+ prompts: List[str],
372
+ batch_size: int=1,
373
+ instruction: str="",
374
+ max_length: int=4096,
375
+ num_workers: int=32,
376
+ **kwargs
377
+ ) -> Union[np.ndarray, torch.FloatTensor]:
378
+ dataset: Dataset = Dataset.from_dict({'input_texts': prompts})
379
+ dataset.set_transform(partial(input_transform_func,
380
+ self.tokenizer,
381
+ max_length=max_length,
382
+ instruction=instruction))
383
+
384
+ data_collator = DataCollatorWithPadding(self.tokenizer)
385
+ data_loader = DataLoader(
386
+ dataset,
387
+ batch_size=batch_size,
388
+ shuffle=False,
389
+ drop_last=False,
390
+ num_workers=num_workers,
391
+ collate_fn=data_collator,
392
+ pin_memory=True)
393
+
394
+ if self.padding_side == "right" and self.is_mask_instruction == True and len(instruction) > 0:
395
+ instruction_lens = len(self.tokenizer.tokenize(instruction))
396
+ else:
397
+ instruction_lens = 0
398
+
399
+ encoded_embeds = []
400
+ device = next(self.model.parameters()).device
401
+ for batch_dict in tqdm(data_loader, desc='encoding', mininterval=10):
402
+ features = self.prepare_kwargs_from_batch(batch_dict, instruction_lens, device=device)
403
+ embeds=self(**features)["sentence_embeddings"].squeeze(1)
404
+ encoded_embeds.append(embeds)
405
+ encoded_embeds = torch.cat(encoded_embeds, axis=0)
406
+ if "return_numpy" in kwargs and kwargs.get("return_numpy"):
407
+ encoded_embeds = encoded_embeds.cpu().detach().numpy()
408
+ return encoded_embeds
409
+
410
+ def forward(self, input_ids: torch.Tensor, attention_mask: torch.Tensor, pool_mask: Optional[torch.Tensor]=None,
411
+ return_dict: bool=True, **kwargs):
412
+ outputs = self.model(input_ids=input_ids, attention_mask=attention_mask, **kwargs)
413
+
414
+ embeds = self.latent_attention_model(
415
+ outputs.last_hidden_state,
416
+ attention_mask,
417
+ )
418
+ if not return_dict:
419
+ return (embeds,)
420
+ return {"sentence_embeddings": embeds}
421
+
422
+
423
+ @torch.no_grad()
424
+ def encode(self, prompts: List[str], instruction: str="", max_length: int=4096, **kwargs):
425
+ if self.padding_side == "right" and self.is_mask_instruction == True and len(instruction) > 0:
426
+ instruction_lens = len(self.tokenizer.tokenize(instruction))
427
+ else:
428
+ instruction_lens = 0
429
+
430
+ device = next(self.model.parameters()).device
431
+ batch_dict = input_transform_func(self.tokenizer,
432
+ {"input_texts": [prompt for prompt in prompts]},
433
+ max_length=max_length,
434
+ instruction=instruction)
435
+
436
+ features: GigarEmbedFeatures = self.prepare_kwargs_from_batch(batch_dict, instruction_lens, device=device)
437
+ return self(**features)["sentence_embeddings"].squeeze(1)
438
+
439
+
440
+ ## AutoModel Register
441
+ AutoModel.register(GigarEmbedConfig, GigarEmbedModel)
442
+ AutoModel.register(LatentAttentionConfig, LatentAttentionModel)
443
+ AutoModel.register(BidirectionalLlamaConfig, BidirectionalLlamaModel)
444
+
445
+ ## Register for auto class
446
+ GigarEmbedModel.register_for_auto_class("AutoModel")
447
+ LatentAttentionModel.register_for_auto_class("AutoModel")
448
+ BidirectionalLlamaModel.register_for_auto_class("AutoModel")
special_tokens_map.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "eos_token": "</s>",
4
+ "unk_token": "<unk>"
5
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,2082 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<unk>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "<s>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "</s>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "128000": {
28
+ "content": "<|gigatoken_1|>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "128001": {
36
+ "content": "<|gigatoken_2|>",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ },
43
+ "128002": {
44
+ "content": "<|gigatoken_3|>",
45
+ "lstrip": false,
46
+ "normalized": false,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": true
50
+ },
51
+ "128003": {
52
+ "content": "<|gigatoken_4|>",
53
+ "lstrip": false,
54
+ "normalized": false,
55
+ "rstrip": false,
56
+ "single_word": false,
57
+ "special": true
58
+ },
59
+ "128004": {
60
+ "content": "<|gigatoken_5|>",
61
+ "lstrip": false,
62
+ "normalized": false,
63
+ "rstrip": false,
64
+ "single_word": false,
65
+ "special": true
66
+ },
67
+ "128005": {
68
+ "content": "<|gigatoken_6|>",
69
+ "lstrip": false,
70
+ "normalized": false,
71
+ "rstrip": false,
72
+ "single_word": false,
73
+ "special": true
74
+ },
75
+ "128006": {
76
+ "content": "<|gigatoken_7|>",
77
+ "lstrip": false,
78
+ "normalized": false,
79
+ "rstrip": false,
80
+ "single_word": false,
81
+ "special": true
82
+ },
83
+ "128007": {
84
+ "content": "<|gigatoken_8|>",
85
+ "lstrip": false,
86
+ "normalized": false,
87
+ "rstrip": false,
88
+ "single_word": false,
89
+ "special": true
90
+ },
91
+ "128008": {
92
+ "content": "<|gigatoken_9|>",
93
+ "lstrip": false,
94
+ "normalized": false,
95
+ "rstrip": false,
96
+ "single_word": false,
97
+ "special": true
98
+ },
99
+ "128009": {
100
+ "content": "<|gigatoken_10|>",
101
+ "lstrip": false,
102
+ "normalized": false,
103
+ "rstrip": false,
104
+ "single_word": false,
105
+ "special": true
106
+ },
107
+ "128010": {
108
+ "content": "<|gigatoken_11|>",
109
+ "lstrip": false,
110
+ "normalized": false,
111
+ "rstrip": false,
112
+ "single_word": false,
113
+ "special": true
114
+ },
115
+ "128011": {
116
+ "content": "<|gigatoken_12|>",
117
+ "lstrip": false,
118
+ "normalized": false,
119
+ "rstrip": false,
120
+ "single_word": false,
121
+ "special": true
122
+ },
123
+ "128012": {
124
+ "content": "<|gigatoken_13|>",
125
+ "lstrip": false,
126
+ "normalized": false,
127
+ "rstrip": false,
128
+ "single_word": false,
129
+ "special": true
130
+ },
131
+ "128013": {
132
+ "content": "<|gigatoken_14|>",
133
+ "lstrip": false,
134
+ "normalized": false,
135
+ "rstrip": false,
136
+ "single_word": false,
137
+ "special": true
138
+ },
139
+ "128014": {
140
+ "content": "<|gigatoken_15|>",
141
+ "lstrip": false,
142
+ "normalized": false,
143
+ "rstrip": false,
144
+ "single_word": false,
145
+ "special": true
146
+ },
147
+ "128015": {
148
+ "content": "<|gigatoken_16|>",
149
+ "lstrip": false,
150
+ "normalized": false,
151
+ "rstrip": false,
152
+ "single_word": false,
153
+ "special": true
154
+ },
155
+ "128016": {
156
+ "content": "<|gigatoken_17|>",
157
+ "lstrip": false,
158
+ "normalized": false,
159
+ "rstrip": false,
160
+ "single_word": false,
161
+ "special": true
162
+ },
163
+ "128017": {
164
+ "content": "<|gigatoken_18|>",
165
+ "lstrip": false,
166
+ "normalized": false,
167
+ "rstrip": false,
168
+ "single_word": false,
169
+ "special": true
170
+ },
171
+ "128018": {
172
+ "content": "<|gigatoken_19|>",
173
+ "lstrip": false,
174
+ "normalized": false,
175
+ "rstrip": false,
176
+ "single_word": false,
177
+ "special": true
178
+ },
179
+ "128019": {
180
+ "content": "<|gigatoken_20|>",
181
+ "lstrip": false,
182
+ "normalized": false,
183
+ "rstrip": false,
184
+ "single_word": false,
185
+ "special": true
186
+ },
187
+ "128020": {
188
+ "content": "<|gigatoken_21|>",
189
+ "lstrip": false,
190
+ "normalized": false,
191
+ "rstrip": false,
192
+ "single_word": false,
193
+ "special": true
194
+ },
195
+ "128021": {
196
+ "content": "<|gigatoken_22|>",
197
+ "lstrip": false,
198
+ "normalized": false,
199
+ "rstrip": false,
200
+ "single_word": false,
201
+ "special": true
202
+ },
203
+ "128022": {
204
+ "content": "<|gigatoken_23|>",
205
+ "lstrip": false,
206
+ "normalized": false,
207
+ "rstrip": false,
208
+ "single_word": false,
209
+ "special": true
210
+ },
211
+ "128023": {
212
+ "content": "<|gigatoken_24|>",
213
+ "lstrip": false,
214
+ "normalized": false,
215
+ "rstrip": false,
216
+ "single_word": false,
217
+ "special": true
218
+ },
219
+ "128024": {
220
+ "content": "<|gigatoken_25|>",
221
+ "lstrip": false,
222
+ "normalized": false,
223
+ "rstrip": false,
224
+ "single_word": false,
225
+ "special": true
226
+ },
227
+ "128025": {
228
+ "content": "<|gigatoken_26|>",
229
+ "lstrip": false,
230
+ "normalized": false,
231
+ "rstrip": false,
232
+ "single_word": false,
233
+ "special": true
234
+ },
235
+ "128026": {
236
+ "content": "<|gigatoken_27|>",
237
+ "lstrip": false,
238
+ "normalized": false,
239
+ "rstrip": false,
240
+ "single_word": false,
241
+ "special": true
242
+ },
243
+ "128027": {
244
+ "content": "<|gigatoken_28|>",
245
+ "lstrip": false,
246
+ "normalized": false,
247
+ "rstrip": false,
248
+ "single_word": false,
249
+ "special": true
250
+ },
251
+ "128028": {
252
+ "content": "<|gigatoken_29|>",
253
+ "lstrip": false,
254
+ "normalized": false,
255
+ "rstrip": false,
256
+ "single_word": false,
257
+ "special": true
258
+ },
259
+ "128029": {
260
+ "content": "<|gigatoken_30|>",
261
+ "lstrip": false,
262
+ "normalized": false,
263
+ "rstrip": false,
264
+ "single_word": false,
265
+ "special": true
266
+ },
267
+ "128030": {
268
+ "content": "<|gigatoken_31|>",
269
+ "lstrip": false,
270
+ "normalized": false,
271
+ "rstrip": false,
272
+ "single_word": false,
273
+ "special": true
274
+ },
275
+ "128031": {
276
+ "content": "<|gigatoken_32|>",
277
+ "lstrip": false,
278
+ "normalized": false,
279
+ "rstrip": false,
280
+ "single_word": false,
281
+ "special": true
282
+ },
283
+ "128032": {
284
+ "content": "<|gigatoken_33|>",
285
+ "lstrip": false,
286
+ "normalized": false,
287
+ "rstrip": false,
288
+ "single_word": false,
289
+ "special": true
290
+ },
291
+ "128033": {
292
+ "content": "<|gigatoken_34|>",
293
+ "lstrip": false,
294
+ "normalized": false,
295
+ "rstrip": false,
296
+ "single_word": false,
297
+ "special": true
298
+ },
299
+ "128034": {
300
+ "content": "<|gigatoken_35|>",
301
+ "lstrip": false,
302
+ "normalized": false,
303
+ "rstrip": false,
304
+ "single_word": false,
305
+ "special": true
306
+ },
307
+ "128035": {
308
+ "content": "<|gigatoken_36|>",
309
+ "lstrip": false,
310
+ "normalized": false,
311
+ "rstrip": false,
312
+ "single_word": false,
313
+ "special": true
314
+ },
315
+ "128036": {
316
+ "content": "<|gigatoken_37|>",
317
+ "lstrip": false,
318
+ "normalized": false,
319
+ "rstrip": false,
320
+ "single_word": false,
321
+ "special": true
322
+ },
323
+ "128037": {
324
+ "content": "<|gigatoken_38|>",
325
+ "lstrip": false,
326
+ "normalized": false,
327
+ "rstrip": false,
328
+ "single_word": false,
329
+ "special": true
330
+ },
331
+ "128038": {
332
+ "content": "<|gigatoken_39|>",
333
+ "lstrip": false,
334
+ "normalized": false,
335
+ "rstrip": false,
336
+ "single_word": false,
337
+ "special": true
338
+ },
339
+ "128039": {
340
+ "content": "<|gigatoken_40|>",
341
+ "lstrip": false,
342
+ "normalized": false,
343
+ "rstrip": false,
344
+ "single_word": false,
345
+ "special": true
346
+ },
347
+ "128040": {
348
+ "content": "<|gigatoken_41|>",
349
+ "lstrip": false,
350
+ "normalized": false,
351
+ "rstrip": false,
352
+ "single_word": false,
353
+ "special": true
354
+ },
355
+ "128041": {
356
+ "content": "<|gigatoken_42|>",
357
+ "lstrip": false,
358
+ "normalized": false,
359
+ "rstrip": false,
360
+ "single_word": false,
361
+ "special": true
362
+ },
363
+ "128042": {
364
+ "content": "<|gigatoken_43|>",
365
+ "lstrip": false,
366
+ "normalized": false,
367
+ "rstrip": false,
368
+ "single_word": false,
369
+ "special": true
370
+ },
371
+ "128043": {
372
+ "content": "<|gigatoken_44|>",
373
+ "lstrip": false,
374
+ "normalized": false,
375
+ "rstrip": false,
376
+ "single_word": false,
377
+ "special": true
378
+ },
379
+ "128044": {
380
+ "content": "<|gigatoken_45|>",
381
+ "lstrip": false,
382
+ "normalized": false,
383
+ "rstrip": false,
384
+ "single_word": false,
385
+ "special": true
386
+ },
387
+ "128045": {
388
+ "content": "<|gigatoken_46|>",
389
+ "lstrip": false,
390
+ "normalized": false,
391
+ "rstrip": false,
392
+ "single_word": false,
393
+ "special": true
394
+ },
395
+ "128046": {
396
+ "content": "<|gigatoken_47|>",
397
+ "lstrip": false,
398
+ "normalized": false,
399
+ "rstrip": false,
400
+ "single_word": false,
401
+ "special": true
402
+ },
403
+ "128047": {
404
+ "content": "<|gigatoken_48|>",
405
+ "lstrip": false,
406
+ "normalized": false,
407
+ "rstrip": false,
408
+ "single_word": false,
409
+ "special": true
410
+ },
411
+ "128048": {
412
+ "content": "<|gigatoken_49|>",
413
+ "lstrip": false,
414
+ "normalized": false,
415
+ "rstrip": false,
416
+ "single_word": false,
417
+ "special": true
418
+ },
419
+ "128049": {
420
+ "content": "<|gigatoken_50|>",
421
+ "lstrip": false,
422
+ "normalized": false,
423
+ "rstrip": false,
424
+ "single_word": false,
425
+ "special": true
426
+ },
427
+ "128050": {
428
+ "content": "<|gigatoken_51|>",
429
+ "lstrip": false,
430
+ "normalized": false,
431
+ "rstrip": false,
432
+ "single_word": false,
433
+ "special": true
434
+ },
435
+ "128051": {
436
+ "content": "<|gigatoken_52|>",
437
+ "lstrip": false,
438
+ "normalized": false,
439
+ "rstrip": false,
440
+ "single_word": false,
441
+ "special": true
442
+ },
443
+ "128052": {
444
+ "content": "<|gigatoken_53|>",
445
+ "lstrip": false,
446
+ "normalized": false,
447
+ "rstrip": false,
448
+ "single_word": false,
449
+ "special": true
450
+ },
451
+ "128053": {
452
+ "content": "<|gigatoken_54|>",
453
+ "lstrip": false,
454
+ "normalized": false,
455
+ "rstrip": false,
456
+ "single_word": false,
457
+ "special": true
458
+ },
459
+ "128054": {
460
+ "content": "<|gigatoken_55|>",
461
+ "lstrip": false,
462
+ "normalized": false,
463
+ "rstrip": false,
464
+ "single_word": false,
465
+ "special": true
466
+ },
467
+ "128055": {
468
+ "content": "<|gigatoken_56|>",
469
+ "lstrip": false,
470
+ "normalized": false,
471
+ "rstrip": false,
472
+ "single_word": false,
473
+ "special": true
474
+ },
475
+ "128056": {
476
+ "content": "<|gigatoken_57|>",
477
+ "lstrip": false,
478
+ "normalized": false,
479
+ "rstrip": false,
480
+ "single_word": false,
481
+ "special": true
482
+ },
483
+ "128057": {
484
+ "content": "<|gigatoken_58|>",
485
+ "lstrip": false,
486
+ "normalized": false,
487
+ "rstrip": false,
488
+ "single_word": false,
489
+ "special": true
490
+ },
491
+ "128058": {
492
+ "content": "<|gigatoken_59|>",
493
+ "lstrip": false,
494
+ "normalized": false,
495
+ "rstrip": false,
496
+ "single_word": false,
497
+ "special": true
498
+ },
499
+ "128059": {
500
+ "content": "<|gigatoken_60|>",
501
+ "lstrip": false,
502
+ "normalized": false,
503
+ "rstrip": false,
504
+ "single_word": false,
505
+ "special": true
506
+ },
507
+ "128060": {
508
+ "content": "<|gigatoken_61|>",
509
+ "lstrip": false,
510
+ "normalized": false,
511
+ "rstrip": false,
512
+ "single_word": false,
513
+ "special": true
514
+ },
515
+ "128061": {
516
+ "content": "<|gigatoken_62|>",
517
+ "lstrip": false,
518
+ "normalized": false,
519
+ "rstrip": false,
520
+ "single_word": false,
521
+ "special": true
522
+ },
523
+ "128062": {
524
+ "content": "<|gigatoken_63|>",
525
+ "lstrip": false,
526
+ "normalized": false,
527
+ "rstrip": false,
528
+ "single_word": false,
529
+ "special": true
530
+ },
531
+ "128063": {
532
+ "content": "<|gigatoken_64|>",
533
+ "lstrip": false,
534
+ "normalized": false,
535
+ "rstrip": false,
536
+ "single_word": false,
537
+ "special": true
538
+ },
539
+ "128064": {
540
+ "content": "<|gigatoken_65|>",
541
+ "lstrip": false,
542
+ "normalized": false,
543
+ "rstrip": false,
544
+ "single_word": false,
545
+ "special": true
546
+ },
547
+ "128065": {
548
+ "content": "<|gigatoken_66|>",
549
+ "lstrip": false,
550
+ "normalized": false,
551
+ "rstrip": false,
552
+ "single_word": false,
553
+ "special": true
554
+ },
555
+ "128066": {
556
+ "content": "<|gigatoken_67|>",
557
+ "lstrip": false,
558
+ "normalized": false,
559
+ "rstrip": false,
560
+ "single_word": false,
561
+ "special": true
562
+ },
563
+ "128067": {
564
+ "content": "<|gigatoken_68|>",
565
+ "lstrip": false,
566
+ "normalized": false,
567
+ "rstrip": false,
568
+ "single_word": false,
569
+ "special": true
570
+ },
571
+ "128068": {
572
+ "content": "<|gigatoken_69|>",
573
+ "lstrip": false,
574
+ "normalized": false,
575
+ "rstrip": false,
576
+ "single_word": false,
577
+ "special": true
578
+ },
579
+ "128069": {
580
+ "content": "<|gigatoken_70|>",
581
+ "lstrip": false,
582
+ "normalized": false,
583
+ "rstrip": false,
584
+ "single_word": false,
585
+ "special": true
586
+ },
587
+ "128070": {
588
+ "content": "<|gigatoken_71|>",
589
+ "lstrip": false,
590
+ "normalized": false,
591
+ "rstrip": false,
592
+ "single_word": false,
593
+ "special": true
594
+ },
595
+ "128071": {
596
+ "content": "<|gigatoken_72|>",
597
+ "lstrip": false,
598
+ "normalized": false,
599
+ "rstrip": false,
600
+ "single_word": false,
601
+ "special": true
602
+ },
603
+ "128072": {
604
+ "content": "<|gigatoken_73|>",
605
+ "lstrip": false,
606
+ "normalized": false,
607
+ "rstrip": false,
608
+ "single_word": false,
609
+ "special": true
610
+ },
611
+ "128073": {
612
+ "content": "<|gigatoken_74|>",
613
+ "lstrip": false,
614
+ "normalized": false,
615
+ "rstrip": false,
616
+ "single_word": false,
617
+ "special": true
618
+ },
619
+ "128074": {
620
+ "content": "<|gigatoken_75|>",
621
+ "lstrip": false,
622
+ "normalized": false,
623
+ "rstrip": false,
624
+ "single_word": false,
625
+ "special": true
626
+ },
627
+ "128075": {
628
+ "content": "<|gigatoken_76|>",
629
+ "lstrip": false,
630
+ "normalized": false,
631
+ "rstrip": false,
632
+ "single_word": false,
633
+ "special": true
634
+ },
635
+ "128076": {
636
+ "content": "<|gigatoken_77|>",
637
+ "lstrip": false,
638
+ "normalized": false,
639
+ "rstrip": false,
640
+ "single_word": false,
641
+ "special": true
642
+ },
643
+ "128077": {
644
+ "content": "<|gigatoken_78|>",
645
+ "lstrip": false,
646
+ "normalized": false,
647
+ "rstrip": false,
648
+ "single_word": false,
649
+ "special": true
650
+ },
651
+ "128078": {
652
+ "content": "<|gigatoken_79|>",
653
+ "lstrip": false,
654
+ "normalized": false,
655
+ "rstrip": false,
656
+ "single_word": false,
657
+ "special": true
658
+ },
659
+ "128079": {
660
+ "content": "<|gigatoken_80|>",
661
+ "lstrip": false,
662
+ "normalized": false,
663
+ "rstrip": false,
664
+ "single_word": false,
665
+ "special": true
666
+ },
667
+ "128080": {
668
+ "content": "<|gigatoken_81|>",
669
+ "lstrip": false,
670
+ "normalized": false,
671
+ "rstrip": false,
672
+ "single_word": false,
673
+ "special": true
674
+ },
675
+ "128081": {
676
+ "content": "<|gigatoken_82|>",
677
+ "lstrip": false,
678
+ "normalized": false,
679
+ "rstrip": false,
680
+ "single_word": false,
681
+ "special": true
682
+ },
683
+ "128082": {
684
+ "content": "<|gigatoken_83|>",
685
+ "lstrip": false,
686
+ "normalized": false,
687
+ "rstrip": false,
688
+ "single_word": false,
689
+ "special": true
690
+ },
691
+ "128083": {
692
+ "content": "<|gigatoken_84|>",
693
+ "lstrip": false,
694
+ "normalized": false,
695
+ "rstrip": false,
696
+ "single_word": false,
697
+ "special": true
698
+ },
699
+ "128084": {
700
+ "content": "<|gigatoken_85|>",
701
+ "lstrip": false,
702
+ "normalized": false,
703
+ "rstrip": false,
704
+ "single_word": false,
705
+ "special": true
706
+ },
707
+ "128085": {
708
+ "content": "<|gigatoken_86|>",
709
+ "lstrip": false,
710
+ "normalized": false,
711
+ "rstrip": false,
712
+ "single_word": false,
713
+ "special": true
714
+ },
715
+ "128086": {
716
+ "content": "<|gigatoken_87|>",
717
+ "lstrip": false,
718
+ "normalized": false,
719
+ "rstrip": false,
720
+ "single_word": false,
721
+ "special": true
722
+ },
723
+ "128087": {
724
+ "content": "<|gigatoken_88|>",
725
+ "lstrip": false,
726
+ "normalized": false,
727
+ "rstrip": false,
728
+ "single_word": false,
729
+ "special": true
730
+ },
731
+ "128088": {
732
+ "content": "<|gigatoken_89|>",
733
+ "lstrip": false,
734
+ "normalized": false,
735
+ "rstrip": false,
736
+ "single_word": false,
737
+ "special": true
738
+ },
739
+ "128089": {
740
+ "content": "<|gigatoken_90|>",
741
+ "lstrip": false,
742
+ "normalized": false,
743
+ "rstrip": false,
744
+ "single_word": false,
745
+ "special": true
746
+ },
747
+ "128090": {
748
+ "content": "<|gigatoken_91|>",
749
+ "lstrip": false,
750
+ "normalized": false,
751
+ "rstrip": false,
752
+ "single_word": false,
753
+ "special": true
754
+ },
755
+ "128091": {
756
+ "content": "<|gigatoken_92|>",
757
+ "lstrip": false,
758
+ "normalized": false,
759
+ "rstrip": false,
760
+ "single_word": false,
761
+ "special": true
762
+ },
763
+ "128092": {
764
+ "content": "<|gigatoken_93|>",
765
+ "lstrip": false,
766
+ "normalized": false,
767
+ "rstrip": false,
768
+ "single_word": false,
769
+ "special": true
770
+ },
771
+ "128093": {
772
+ "content": "<|gigatoken_94|>",
773
+ "lstrip": false,
774
+ "normalized": false,
775
+ "rstrip": false,
776
+ "single_word": false,
777
+ "special": true
778
+ },
779
+ "128094": {
780
+ "content": "<|gigatoken_95|>",
781
+ "lstrip": false,
782
+ "normalized": false,
783
+ "rstrip": false,
784
+ "single_word": false,
785
+ "special": true
786
+ },
787
+ "128095": {
788
+ "content": "<|gigatoken_96|>",
789
+ "lstrip": false,
790
+ "normalized": false,
791
+ "rstrip": false,
792
+ "single_word": false,
793
+ "special": true
794
+ },
795
+ "128096": {
796
+ "content": "<|gigatoken_97|>",
797
+ "lstrip": false,
798
+ "normalized": false,
799
+ "rstrip": false,
800
+ "single_word": false,
801
+ "special": true
802
+ },
803
+ "128097": {
804
+ "content": "<|gigatoken_98|>",
805
+ "lstrip": false,
806
+ "normalized": false,
807
+ "rstrip": false,
808
+ "single_word": false,
809
+ "special": true
810
+ },
811
+ "128098": {
812
+ "content": "<|gigatoken_99|>",
813
+ "lstrip": false,
814
+ "normalized": false,
815
+ "rstrip": false,
816
+ "single_word": false,
817
+ "special": true
818
+ },
819
+ "128099": {
820
+ "content": "<|gigatoken_100|>",
821
+ "lstrip": false,
822
+ "normalized": false,
823
+ "rstrip": false,
824
+ "single_word": false,
825
+ "special": true
826
+ },
827
+ "128100": {
828
+ "content": "<|gigatoken_101|>",
829
+ "lstrip": false,
830
+ "normalized": false,
831
+ "rstrip": false,
832
+ "single_word": false,
833
+ "special": true
834
+ },
835
+ "128101": {
836
+ "content": "<|gigatoken_102|>",
837
+ "lstrip": false,
838
+ "normalized": false,
839
+ "rstrip": false,
840
+ "single_word": false,
841
+ "special": true
842
+ },
843
+ "128102": {
844
+ "content": "<|gigatoken_103|>",
845
+ "lstrip": false,
846
+ "normalized": false,
847
+ "rstrip": false,
848
+ "single_word": false,
849
+ "special": true
850
+ },
851
+ "128103": {
852
+ "content": "<|gigatoken_104|>",
853
+ "lstrip": false,
854
+ "normalized": false,
855
+ "rstrip": false,
856
+ "single_word": false,
857
+ "special": true
858
+ },
859
+ "128104": {
860
+ "content": "<|gigatoken_105|>",
861
+ "lstrip": false,
862
+ "normalized": false,
863
+ "rstrip": false,
864
+ "single_word": false,
865
+ "special": true
866
+ },
867
+ "128105": {
868
+ "content": "<|gigatoken_106|>",
869
+ "lstrip": false,
870
+ "normalized": false,
871
+ "rstrip": false,
872
+ "single_word": false,
873
+ "special": true
874
+ },
875
+ "128106": {
876
+ "content": "<|gigatoken_107|>",
877
+ "lstrip": false,
878
+ "normalized": false,
879
+ "rstrip": false,
880
+ "single_word": false,
881
+ "special": true
882
+ },
883
+ "128107": {
884
+ "content": "<|gigatoken_108|>",
885
+ "lstrip": false,
886
+ "normalized": false,
887
+ "rstrip": false,
888
+ "single_word": false,
889
+ "special": true
890
+ },
891
+ "128108": {
892
+ "content": "<|gigatoken_109|>",
893
+ "lstrip": false,
894
+ "normalized": false,
895
+ "rstrip": false,
896
+ "single_word": false,
897
+ "special": true
898
+ },
899
+ "128109": {
900
+ "content": "<|gigatoken_110|>",
901
+ "lstrip": false,
902
+ "normalized": false,
903
+ "rstrip": false,
904
+ "single_word": false,
905
+ "special": true
906
+ },
907
+ "128110": {
908
+ "content": "<|gigatoken_111|>",
909
+ "lstrip": false,
910
+ "normalized": false,
911
+ "rstrip": false,
912
+ "single_word": false,
913
+ "special": true
914
+ },
915
+ "128111": {
916
+ "content": "<|gigatoken_112|>",
917
+ "lstrip": false,
918
+ "normalized": false,
919
+ "rstrip": false,
920
+ "single_word": false,
921
+ "special": true
922
+ },
923
+ "128112": {
924
+ "content": "<|gigatoken_113|>",
925
+ "lstrip": false,
926
+ "normalized": false,
927
+ "rstrip": false,
928
+ "single_word": false,
929
+ "special": true
930
+ },
931
+ "128113": {
932
+ "content": "<|gigatoken_114|>",
933
+ "lstrip": false,
934
+ "normalized": false,
935
+ "rstrip": false,
936
+ "single_word": false,
937
+ "special": true
938
+ },
939
+ "128114": {
940
+ "content": "<|gigatoken_115|>",
941
+ "lstrip": false,
942
+ "normalized": false,
943
+ "rstrip": false,
944
+ "single_word": false,
945
+ "special": true
946
+ },
947
+ "128115": {
948
+ "content": "<|gigatoken_116|>",
949
+ "lstrip": false,
950
+ "normalized": false,
951
+ "rstrip": false,
952
+ "single_word": false,
953
+ "special": true
954
+ },
955
+ "128116": {
956
+ "content": "<|gigatoken_117|>",
957
+ "lstrip": false,
958
+ "normalized": false,
959
+ "rstrip": false,
960
+ "single_word": false,
961
+ "special": true
962
+ },
963
+ "128117": {
964
+ "content": "<|gigatoken_118|>",
965
+ "lstrip": false,
966
+ "normalized": false,
967
+ "rstrip": false,
968
+ "single_word": false,
969
+ "special": true
970
+ },
971
+ "128118": {
972
+ "content": "<|gigatoken_119|>",
973
+ "lstrip": false,
974
+ "normalized": false,
975
+ "rstrip": false,
976
+ "single_word": false,
977
+ "special": true
978
+ },
979
+ "128119": {
980
+ "content": "<|gigatoken_120|>",
981
+ "lstrip": false,
982
+ "normalized": false,
983
+ "rstrip": false,
984
+ "single_word": false,
985
+ "special": true
986
+ },
987
+ "128120": {
988
+ "content": "<|gigatoken_121|>",
989
+ "lstrip": false,
990
+ "normalized": false,
991
+ "rstrip": false,
992
+ "single_word": false,
993
+ "special": true
994
+ },
995
+ "128121": {
996
+ "content": "<|gigatoken_122|>",
997
+ "lstrip": false,
998
+ "normalized": false,
999
+ "rstrip": false,
1000
+ "single_word": false,
1001
+ "special": true
1002
+ },
1003
+ "128122": {
1004
+ "content": "<|gigatoken_123|>",
1005
+ "lstrip": false,
1006
+ "normalized": false,
1007
+ "rstrip": false,
1008
+ "single_word": false,
1009
+ "special": true
1010
+ },
1011
+ "128123": {
1012
+ "content": "<|gigatoken_124|>",
1013
+ "lstrip": false,
1014
+ "normalized": false,
1015
+ "rstrip": false,
1016
+ "single_word": false,
1017
+ "special": true
1018
+ },
1019
+ "128124": {
1020
+ "content": "<|gigatoken_125|>",
1021
+ "lstrip": false,
1022
+ "normalized": false,
1023
+ "rstrip": false,
1024
+ "single_word": false,
1025
+ "special": true
1026
+ },
1027
+ "128125": {
1028
+ "content": "<|gigatoken_126|>",
1029
+ "lstrip": false,
1030
+ "normalized": false,
1031
+ "rstrip": false,
1032
+ "single_word": false,
1033
+ "special": true
1034
+ },
1035
+ "128126": {
1036
+ "content": "<|gigatoken_127|>",
1037
+ "lstrip": false,
1038
+ "normalized": false,
1039
+ "rstrip": false,
1040
+ "single_word": false,
1041
+ "special": true
1042
+ },
1043
+ "128127": {
1044
+ "content": "<|gigatoken_128|>",
1045
+ "lstrip": false,
1046
+ "normalized": false,
1047
+ "rstrip": false,
1048
+ "single_word": false,
1049
+ "special": true
1050
+ },
1051
+ "128128": {
1052
+ "content": "<|gigatoken_129|>",
1053
+ "lstrip": false,
1054
+ "normalized": false,
1055
+ "rstrip": false,
1056
+ "single_word": false,
1057
+ "special": true
1058
+ },
1059
+ "128129": {
1060
+ "content": "<|gigatoken_130|>",
1061
+ "lstrip": false,
1062
+ "normalized": false,
1063
+ "rstrip": false,
1064
+ "single_word": false,
1065
+ "special": true
1066
+ },
1067
+ "128130": {
1068
+ "content": "<|gigatoken_131|>",
1069
+ "lstrip": false,
1070
+ "normalized": false,
1071
+ "rstrip": false,
1072
+ "single_word": false,
1073
+ "special": true
1074
+ },
1075
+ "128131": {
1076
+ "content": "<|gigatoken_132|>",
1077
+ "lstrip": false,
1078
+ "normalized": false,
1079
+ "rstrip": false,
1080
+ "single_word": false,
1081
+ "special": true
1082
+ },
1083
+ "128132": {
1084
+ "content": "<|gigatoken_133|>",
1085
+ "lstrip": false,
1086
+ "normalized": false,
1087
+ "rstrip": false,
1088
+ "single_word": false,
1089
+ "special": true
1090
+ },
1091
+ "128133": {
1092
+ "content": "<|gigatoken_134|>",
1093
+ "lstrip": false,
1094
+ "normalized": false,
1095
+ "rstrip": false,
1096
+ "single_word": false,
1097
+ "special": true
1098
+ },
1099
+ "128134": {
1100
+ "content": "<|gigatoken_135|>",
1101
+ "lstrip": false,
1102
+ "normalized": false,
1103
+ "rstrip": false,
1104
+ "single_word": false,
1105
+ "special": true
1106
+ },
1107
+ "128135": {
1108
+ "content": "<|gigatoken_136|>",
1109
+ "lstrip": false,
1110
+ "normalized": false,
1111
+ "rstrip": false,
1112
+ "single_word": false,
1113
+ "special": true
1114
+ },
1115
+ "128136": {
1116
+ "content": "<|gigatoken_137|>",
1117
+ "lstrip": false,
1118
+ "normalized": false,
1119
+ "rstrip": false,
1120
+ "single_word": false,
1121
+ "special": true
1122
+ },
1123
+ "128137": {
1124
+ "content": "<|gigatoken_138|>",
1125
+ "lstrip": false,
1126
+ "normalized": false,
1127
+ "rstrip": false,
1128
+ "single_word": false,
1129
+ "special": true
1130
+ },
1131
+ "128138": {
1132
+ "content": "<|gigatoken_139|>",
1133
+ "lstrip": false,
1134
+ "normalized": false,
1135
+ "rstrip": false,
1136
+ "single_word": false,
1137
+ "special": true
1138
+ },
1139
+ "128139": {
1140
+ "content": "<|gigatoken_140|>",
1141
+ "lstrip": false,
1142
+ "normalized": false,
1143
+ "rstrip": false,
1144
+ "single_word": false,
1145
+ "special": true
1146
+ },
1147
+ "128140": {
1148
+ "content": "<|gigatoken_141|>",
1149
+ "lstrip": false,
1150
+ "normalized": false,
1151
+ "rstrip": false,
1152
+ "single_word": false,
1153
+ "special": true
1154
+ },
1155
+ "128141": {
1156
+ "content": "<|gigatoken_142|>",
1157
+ "lstrip": false,
1158
+ "normalized": false,
1159
+ "rstrip": false,
1160
+ "single_word": false,
1161
+ "special": true
1162
+ },
1163
+ "128142": {
1164
+ "content": "<|gigatoken_143|>",
1165
+ "lstrip": false,
1166
+ "normalized": false,
1167
+ "rstrip": false,
1168
+ "single_word": false,
1169
+ "special": true
1170
+ },
1171
+ "128143": {
1172
+ "content": "<|gigatoken_144|>",
1173
+ "lstrip": false,
1174
+ "normalized": false,
1175
+ "rstrip": false,
1176
+ "single_word": false,
1177
+ "special": true
1178
+ },
1179
+ "128144": {
1180
+ "content": "<|gigatoken_145|>",
1181
+ "lstrip": false,
1182
+ "normalized": false,
1183
+ "rstrip": false,
1184
+ "single_word": false,
1185
+ "special": true
1186
+ },
1187
+ "128145": {
1188
+ "content": "<|gigatoken_146|>",
1189
+ "lstrip": false,
1190
+ "normalized": false,
1191
+ "rstrip": false,
1192
+ "single_word": false,
1193
+ "special": true
1194
+ },
1195
+ "128146": {
1196
+ "content": "<|gigatoken_147|>",
1197
+ "lstrip": false,
1198
+ "normalized": false,
1199
+ "rstrip": false,
1200
+ "single_word": false,
1201
+ "special": true
1202
+ },
1203
+ "128147": {
1204
+ "content": "<|gigatoken_148|>",
1205
+ "lstrip": false,
1206
+ "normalized": false,
1207
+ "rstrip": false,
1208
+ "single_word": false,
1209
+ "special": true
1210
+ },
1211
+ "128148": {
1212
+ "content": "<|gigatoken_149|>",
1213
+ "lstrip": false,
1214
+ "normalized": false,
1215
+ "rstrip": false,
1216
+ "single_word": false,
1217
+ "special": true
1218
+ },
1219
+ "128149": {
1220
+ "content": "<|gigatoken_150|>",
1221
+ "lstrip": false,
1222
+ "normalized": false,
1223
+ "rstrip": false,
1224
+ "single_word": false,
1225
+ "special": true
1226
+ },
1227
+ "128150": {
1228
+ "content": "<|gigatoken_151|>",
1229
+ "lstrip": false,
1230
+ "normalized": false,
1231
+ "rstrip": false,
1232
+ "single_word": false,
1233
+ "special": true
1234
+ },
1235
+ "128151": {
1236
+ "content": "<|gigatoken_152|>",
1237
+ "lstrip": false,
1238
+ "normalized": false,
1239
+ "rstrip": false,
1240
+ "single_word": false,
1241
+ "special": true
1242
+ },
1243
+ "128152": {
1244
+ "content": "<|gigatoken_153|>",
1245
+ "lstrip": false,
1246
+ "normalized": false,
1247
+ "rstrip": false,
1248
+ "single_word": false,
1249
+ "special": true
1250
+ },
1251
+ "128153": {
1252
+ "content": "<|gigatoken_154|>",
1253
+ "lstrip": false,
1254
+ "normalized": false,
1255
+ "rstrip": false,
1256
+ "single_word": false,
1257
+ "special": true
1258
+ },
1259
+ "128154": {
1260
+ "content": "<|gigatoken_155|>",
1261
+ "lstrip": false,
1262
+ "normalized": false,
1263
+ "rstrip": false,
1264
+ "single_word": false,
1265
+ "special": true
1266
+ },
1267
+ "128155": {
1268
+ "content": "<|gigatoken_156|>",
1269
+ "lstrip": false,
1270
+ "normalized": false,
1271
+ "rstrip": false,
1272
+ "single_word": false,
1273
+ "special": true
1274
+ },
1275
+ "128156": {
1276
+ "content": "<|gigatoken_157|>",
1277
+ "lstrip": false,
1278
+ "normalized": false,
1279
+ "rstrip": false,
1280
+ "single_word": false,
1281
+ "special": true
1282
+ },
1283
+ "128157": {
1284
+ "content": "<|gigatoken_158|>",
1285
+ "lstrip": false,
1286
+ "normalized": false,
1287
+ "rstrip": false,
1288
+ "single_word": false,
1289
+ "special": true
1290
+ },
1291
+ "128158": {
1292
+ "content": "<|gigatoken_159|>",
1293
+ "lstrip": false,
1294
+ "normalized": false,
1295
+ "rstrip": false,
1296
+ "single_word": false,
1297
+ "special": true
1298
+ },
1299
+ "128159": {
1300
+ "content": "<|gigatoken_160|>",
1301
+ "lstrip": false,
1302
+ "normalized": false,
1303
+ "rstrip": false,
1304
+ "single_word": false,
1305
+ "special": true
1306
+ },
1307
+ "128160": {
1308
+ "content": "<|gigatoken_161|>",
1309
+ "lstrip": false,
1310
+ "normalized": false,
1311
+ "rstrip": false,
1312
+ "single_word": false,
1313
+ "special": true
1314
+ },
1315
+ "128161": {
1316
+ "content": "<|gigatoken_162|>",
1317
+ "lstrip": false,
1318
+ "normalized": false,
1319
+ "rstrip": false,
1320
+ "single_word": false,
1321
+ "special": true
1322
+ },
1323
+ "128162": {
1324
+ "content": "<|gigatoken_163|>",
1325
+ "lstrip": false,
1326
+ "normalized": false,
1327
+ "rstrip": false,
1328
+ "single_word": false,
1329
+ "special": true
1330
+ },
1331
+ "128163": {
1332
+ "content": "<|gigatoken_164|>",
1333
+ "lstrip": false,
1334
+ "normalized": false,
1335
+ "rstrip": false,
1336
+ "single_word": false,
1337
+ "special": true
1338
+ },
1339
+ "128164": {
1340
+ "content": "<|gigatoken_165|>",
1341
+ "lstrip": false,
1342
+ "normalized": false,
1343
+ "rstrip": false,
1344
+ "single_word": false,
1345
+ "special": true
1346
+ },
1347
+ "128165": {
1348
+ "content": "<|gigatoken_166|>",
1349
+ "lstrip": false,
1350
+ "normalized": false,
1351
+ "rstrip": false,
1352
+ "single_word": false,
1353
+ "special": true
1354
+ },
1355
+ "128166": {
1356
+ "content": "<|gigatoken_167|>",
1357
+ "lstrip": false,
1358
+ "normalized": false,
1359
+ "rstrip": false,
1360
+ "single_word": false,
1361
+ "special": true
1362
+ },
1363
+ "128167": {
1364
+ "content": "<|gigatoken_168|>",
1365
+ "lstrip": false,
1366
+ "normalized": false,
1367
+ "rstrip": false,
1368
+ "single_word": false,
1369
+ "special": true
1370
+ },
1371
+ "128168": {
1372
+ "content": "<|gigatoken_169|>",
1373
+ "lstrip": false,
1374
+ "normalized": false,
1375
+ "rstrip": false,
1376
+ "single_word": false,
1377
+ "special": true
1378
+ },
1379
+ "128169": {
1380
+ "content": "<|gigatoken_170|>",
1381
+ "lstrip": false,
1382
+ "normalized": false,
1383
+ "rstrip": false,
1384
+ "single_word": false,
1385
+ "special": true
1386
+ },
1387
+ "128170": {
1388
+ "content": "<|gigatoken_171|>",
1389
+ "lstrip": false,
1390
+ "normalized": false,
1391
+ "rstrip": false,
1392
+ "single_word": false,
1393
+ "special": true
1394
+ },
1395
+ "128171": {
1396
+ "content": "<|gigatoken_172|>",
1397
+ "lstrip": false,
1398
+ "normalized": false,
1399
+ "rstrip": false,
1400
+ "single_word": false,
1401
+ "special": true
1402
+ },
1403
+ "128172": {
1404
+ "content": "<|gigatoken_173|>",
1405
+ "lstrip": false,
1406
+ "normalized": false,
1407
+ "rstrip": false,
1408
+ "single_word": false,
1409
+ "special": true
1410
+ },
1411
+ "128173": {
1412
+ "content": "<|gigatoken_174|>",
1413
+ "lstrip": false,
1414
+ "normalized": false,
1415
+ "rstrip": false,
1416
+ "single_word": false,
1417
+ "special": true
1418
+ },
1419
+ "128174": {
1420
+ "content": "<|gigatoken_175|>",
1421
+ "lstrip": false,
1422
+ "normalized": false,
1423
+ "rstrip": false,
1424
+ "single_word": false,
1425
+ "special": true
1426
+ },
1427
+ "128175": {
1428
+ "content": "<|gigatoken_176|>",
1429
+ "lstrip": false,
1430
+ "normalized": false,
1431
+ "rstrip": false,
1432
+ "single_word": false,
1433
+ "special": true
1434
+ },
1435
+ "128176": {
1436
+ "content": "<|gigatoken_177|>",
1437
+ "lstrip": false,
1438
+ "normalized": false,
1439
+ "rstrip": false,
1440
+ "single_word": false,
1441
+ "special": true
1442
+ },
1443
+ "128177": {
1444
+ "content": "<|gigatoken_178|>",
1445
+ "lstrip": false,
1446
+ "normalized": false,
1447
+ "rstrip": false,
1448
+ "single_word": false,
1449
+ "special": true
1450
+ },
1451
+ "128178": {
1452
+ "content": "<|gigatoken_179|>",
1453
+ "lstrip": false,
1454
+ "normalized": false,
1455
+ "rstrip": false,
1456
+ "single_word": false,
1457
+ "special": true
1458
+ },
1459
+ "128179": {
1460
+ "content": "<|gigatoken_180|>",
1461
+ "lstrip": false,
1462
+ "normalized": false,
1463
+ "rstrip": false,
1464
+ "single_word": false,
1465
+ "special": true
1466
+ },
1467
+ "128180": {
1468
+ "content": "<|gigatoken_181|>",
1469
+ "lstrip": false,
1470
+ "normalized": false,
1471
+ "rstrip": false,
1472
+ "single_word": false,
1473
+ "special": true
1474
+ },
1475
+ "128181": {
1476
+ "content": "<|gigatoken_182|>",
1477
+ "lstrip": false,
1478
+ "normalized": false,
1479
+ "rstrip": false,
1480
+ "single_word": false,
1481
+ "special": true
1482
+ },
1483
+ "128182": {
1484
+ "content": "<|gigatoken_183|>",
1485
+ "lstrip": false,
1486
+ "normalized": false,
1487
+ "rstrip": false,
1488
+ "single_word": false,
1489
+ "special": true
1490
+ },
1491
+ "128183": {
1492
+ "content": "<|gigatoken_184|>",
1493
+ "lstrip": false,
1494
+ "normalized": false,
1495
+ "rstrip": false,
1496
+ "single_word": false,
1497
+ "special": true
1498
+ },
1499
+ "128184": {
1500
+ "content": "<|gigatoken_185|>",
1501
+ "lstrip": false,
1502
+ "normalized": false,
1503
+ "rstrip": false,
1504
+ "single_word": false,
1505
+ "special": true
1506
+ },
1507
+ "128185": {
1508
+ "content": "<|gigatoken_186|>",
1509
+ "lstrip": false,
1510
+ "normalized": false,
1511
+ "rstrip": false,
1512
+ "single_word": false,
1513
+ "special": true
1514
+ },
1515
+ "128186": {
1516
+ "content": "<|gigatoken_187|>",
1517
+ "lstrip": false,
1518
+ "normalized": false,
1519
+ "rstrip": false,
1520
+ "single_word": false,
1521
+ "special": true
1522
+ },
1523
+ "128187": {
1524
+ "content": "<|gigatoken_188|>",
1525
+ "lstrip": false,
1526
+ "normalized": false,
1527
+ "rstrip": false,
1528
+ "single_word": false,
1529
+ "special": true
1530
+ },
1531
+ "128188": {
1532
+ "content": "<|gigatoken_189|>",
1533
+ "lstrip": false,
1534
+ "normalized": false,
1535
+ "rstrip": false,
1536
+ "single_word": false,
1537
+ "special": true
1538
+ },
1539
+ "128189": {
1540
+ "content": "<|gigatoken_190|>",
1541
+ "lstrip": false,
1542
+ "normalized": false,
1543
+ "rstrip": false,
1544
+ "single_word": false,
1545
+ "special": true
1546
+ },
1547
+ "128190": {
1548
+ "content": "<|gigatoken_191|>",
1549
+ "lstrip": false,
1550
+ "normalized": false,
1551
+ "rstrip": false,
1552
+ "single_word": false,
1553
+ "special": true
1554
+ },
1555
+ "128191": {
1556
+ "content": "<|gigatoken_192|>",
1557
+ "lstrip": false,
1558
+ "normalized": false,
1559
+ "rstrip": false,
1560
+ "single_word": false,
1561
+ "special": true
1562
+ },
1563
+ "128192": {
1564
+ "content": "<|gigatoken_193|>",
1565
+ "lstrip": false,
1566
+ "normalized": false,
1567
+ "rstrip": false,
1568
+ "single_word": false,
1569
+ "special": true
1570
+ },
1571
+ "128193": {
1572
+ "content": "<|gigatoken_194|>",
1573
+ "lstrip": false,
1574
+ "normalized": false,
1575
+ "rstrip": false,
1576
+ "single_word": false,
1577
+ "special": true
1578
+ },
1579
+ "128194": {
1580
+ "content": "<|gigatoken_195|>",
1581
+ "lstrip": false,
1582
+ "normalized": false,
1583
+ "rstrip": false,
1584
+ "single_word": false,
1585
+ "special": true
1586
+ },
1587
+ "128195": {
1588
+ "content": "<|gigatoken_196|>",
1589
+ "lstrip": false,
1590
+ "normalized": false,
1591
+ "rstrip": false,
1592
+ "single_word": false,
1593
+ "special": true
1594
+ },
1595
+ "128196": {
1596
+ "content": "<|gigatoken_197|>",
1597
+ "lstrip": false,
1598
+ "normalized": false,
1599
+ "rstrip": false,
1600
+ "single_word": false,
1601
+ "special": true
1602
+ },
1603
+ "128197": {
1604
+ "content": "<|gigatoken_198|>",
1605
+ "lstrip": false,
1606
+ "normalized": false,
1607
+ "rstrip": false,
1608
+ "single_word": false,
1609
+ "special": true
1610
+ },
1611
+ "128198": {
1612
+ "content": "<|gigatoken_199|>",
1613
+ "lstrip": false,
1614
+ "normalized": false,
1615
+ "rstrip": false,
1616
+ "single_word": false,
1617
+ "special": true
1618
+ },
1619
+ "128199": {
1620
+ "content": "<|gigatoken_200|>",
1621
+ "lstrip": false,
1622
+ "normalized": false,
1623
+ "rstrip": false,
1624
+ "single_word": false,
1625
+ "special": true
1626
+ },
1627
+ "128200": {
1628
+ "content": "<|gigatoken_201|>",
1629
+ "lstrip": false,
1630
+ "normalized": false,
1631
+ "rstrip": false,
1632
+ "single_word": false,
1633
+ "special": true
1634
+ },
1635
+ "128201": {
1636
+ "content": "<|gigatoken_202|>",
1637
+ "lstrip": false,
1638
+ "normalized": false,
1639
+ "rstrip": false,
1640
+ "single_word": false,
1641
+ "special": true
1642
+ },
1643
+ "128202": {
1644
+ "content": "<|gigatoken_203|>",
1645
+ "lstrip": false,
1646
+ "normalized": false,
1647
+ "rstrip": false,
1648
+ "single_word": false,
1649
+ "special": true
1650
+ },
1651
+ "128203": {
1652
+ "content": "<|gigatoken_204|>",
1653
+ "lstrip": false,
1654
+ "normalized": false,
1655
+ "rstrip": false,
1656
+ "single_word": false,
1657
+ "special": true
1658
+ },
1659
+ "128204": {
1660
+ "content": "<|gigatoken_205|>",
1661
+ "lstrip": false,
1662
+ "normalized": false,
1663
+ "rstrip": false,
1664
+ "single_word": false,
1665
+ "special": true
1666
+ },
1667
+ "128205": {
1668
+ "content": "<|gigatoken_206|>",
1669
+ "lstrip": false,
1670
+ "normalized": false,
1671
+ "rstrip": false,
1672
+ "single_word": false,
1673
+ "special": true
1674
+ },
1675
+ "128206": {
1676
+ "content": "<|gigatoken_207|>",
1677
+ "lstrip": false,
1678
+ "normalized": false,
1679
+ "rstrip": false,
1680
+ "single_word": false,
1681
+ "special": true
1682
+ },
1683
+ "128207": {
1684
+ "content": "<|gigatoken_208|>",
1685
+ "lstrip": false,
1686
+ "normalized": false,
1687
+ "rstrip": false,
1688
+ "single_word": false,
1689
+ "special": true
1690
+ },
1691
+ "128208": {
1692
+ "content": "<|gigatoken_209|>",
1693
+ "lstrip": false,
1694
+ "normalized": false,
1695
+ "rstrip": false,
1696
+ "single_word": false,
1697
+ "special": true
1698
+ },
1699
+ "128209": {
1700
+ "content": "<|gigatoken_210|>",
1701
+ "lstrip": false,
1702
+ "normalized": false,
1703
+ "rstrip": false,
1704
+ "single_word": false,
1705
+ "special": true
1706
+ },
1707
+ "128210": {
1708
+ "content": "<|gigatoken_211|>",
1709
+ "lstrip": false,
1710
+ "normalized": false,
1711
+ "rstrip": false,
1712
+ "single_word": false,
1713
+ "special": true
1714
+ },
1715
+ "128211": {
1716
+ "content": "<|gigatoken_212|>",
1717
+ "lstrip": false,
1718
+ "normalized": false,
1719
+ "rstrip": false,
1720
+ "single_word": false,
1721
+ "special": true
1722
+ },
1723
+ "128212": {
1724
+ "content": "<|gigatoken_213|>",
1725
+ "lstrip": false,
1726
+ "normalized": false,
1727
+ "rstrip": false,
1728
+ "single_word": false,
1729
+ "special": true
1730
+ },
1731
+ "128213": {
1732
+ "content": "<|gigatoken_214|>",
1733
+ "lstrip": false,
1734
+ "normalized": false,
1735
+ "rstrip": false,
1736
+ "single_word": false,
1737
+ "special": true
1738
+ },
1739
+ "128214": {
1740
+ "content": "<|gigatoken_215|>",
1741
+ "lstrip": false,
1742
+ "normalized": false,
1743
+ "rstrip": false,
1744
+ "single_word": false,
1745
+ "special": true
1746
+ },
1747
+ "128215": {
1748
+ "content": "<|gigatoken_216|>",
1749
+ "lstrip": false,
1750
+ "normalized": false,
1751
+ "rstrip": false,
1752
+ "single_word": false,
1753
+ "special": true
1754
+ },
1755
+ "128216": {
1756
+ "content": "<|gigatoken_217|>",
1757
+ "lstrip": false,
1758
+ "normalized": false,
1759
+ "rstrip": false,
1760
+ "single_word": false,
1761
+ "special": true
1762
+ },
1763
+ "128217": {
1764
+ "content": "<|gigatoken_218|>",
1765
+ "lstrip": false,
1766
+ "normalized": false,
1767
+ "rstrip": false,
1768
+ "single_word": false,
1769
+ "special": true
1770
+ },
1771
+ "128218": {
1772
+ "content": "<|gigatoken_219|>",
1773
+ "lstrip": false,
1774
+ "normalized": false,
1775
+ "rstrip": false,
1776
+ "single_word": false,
1777
+ "special": true
1778
+ },
1779
+ "128219": {
1780
+ "content": "<|gigatoken_220|>",
1781
+ "lstrip": false,
1782
+ "normalized": false,
1783
+ "rstrip": false,
1784
+ "single_word": false,
1785
+ "special": true
1786
+ },
1787
+ "128220": {
1788
+ "content": "<|gigatoken_221|>",
1789
+ "lstrip": false,
1790
+ "normalized": false,
1791
+ "rstrip": false,
1792
+ "single_word": false,
1793
+ "special": true
1794
+ },
1795
+ "128221": {
1796
+ "content": "<|gigatoken_222|>",
1797
+ "lstrip": false,
1798
+ "normalized": false,
1799
+ "rstrip": false,
1800
+ "single_word": false,
1801
+ "special": true
1802
+ },
1803
+ "128222": {
1804
+ "content": "<|gigatoken_223|>",
1805
+ "lstrip": false,
1806
+ "normalized": false,
1807
+ "rstrip": false,
1808
+ "single_word": false,
1809
+ "special": true
1810
+ },
1811
+ "128223": {
1812
+ "content": "<|gigatoken_224|>",
1813
+ "lstrip": false,
1814
+ "normalized": false,
1815
+ "rstrip": false,
1816
+ "single_word": false,
1817
+ "special": true
1818
+ },
1819
+ "128224": {
1820
+ "content": "<|gigatoken_225|>",
1821
+ "lstrip": false,
1822
+ "normalized": false,
1823
+ "rstrip": false,
1824
+ "single_word": false,
1825
+ "special": true
1826
+ },
1827
+ "128225": {
1828
+ "content": "<|gigatoken_226|>",
1829
+ "lstrip": false,
1830
+ "normalized": false,
1831
+ "rstrip": false,
1832
+ "single_word": false,
1833
+ "special": true
1834
+ },
1835
+ "128226": {
1836
+ "content": "<|gigatoken_227|>",
1837
+ "lstrip": false,
1838
+ "normalized": false,
1839
+ "rstrip": false,
1840
+ "single_word": false,
1841
+ "special": true
1842
+ },
1843
+ "128227": {
1844
+ "content": "<|gigatoken_228|>",
1845
+ "lstrip": false,
1846
+ "normalized": false,
1847
+ "rstrip": false,
1848
+ "single_word": false,
1849
+ "special": true
1850
+ },
1851
+ "128228": {
1852
+ "content": "<|gigatoken_229|>",
1853
+ "lstrip": false,
1854
+ "normalized": false,
1855
+ "rstrip": false,
1856
+ "single_word": false,
1857
+ "special": true
1858
+ },
1859
+ "128229": {
1860
+ "content": "<|gigatoken_230|>",
1861
+ "lstrip": false,
1862
+ "normalized": false,
1863
+ "rstrip": false,
1864
+ "single_word": false,
1865
+ "special": true
1866
+ },
1867
+ "128230": {
1868
+ "content": "<|gigatoken_231|>",
1869
+ "lstrip": false,
1870
+ "normalized": false,
1871
+ "rstrip": false,
1872
+ "single_word": false,
1873
+ "special": true
1874
+ },
1875
+ "128231": {
1876
+ "content": "<|gigatoken_232|>",
1877
+ "lstrip": false,
1878
+ "normalized": false,
1879
+ "rstrip": false,
1880
+ "single_word": false,
1881
+ "special": true
1882
+ },
1883
+ "128232": {
1884
+ "content": "<|gigatoken_233|>",
1885
+ "lstrip": false,
1886
+ "normalized": false,
1887
+ "rstrip": false,
1888
+ "single_word": false,
1889
+ "special": true
1890
+ },
1891
+ "128233": {
1892
+ "content": "<|gigatoken_234|>",
1893
+ "lstrip": false,
1894
+ "normalized": false,
1895
+ "rstrip": false,
1896
+ "single_word": false,
1897
+ "special": true
1898
+ },
1899
+ "128234": {
1900
+ "content": "<|gigatoken_235|>",
1901
+ "lstrip": false,
1902
+ "normalized": false,
1903
+ "rstrip": false,
1904
+ "single_word": false,
1905
+ "special": true
1906
+ },
1907
+ "128235": {
1908
+ "content": "<|gigatoken_236|>",
1909
+ "lstrip": false,
1910
+ "normalized": false,
1911
+ "rstrip": false,
1912
+ "single_word": false,
1913
+ "special": true
1914
+ },
1915
+ "128236": {
1916
+ "content": "<|gigatoken_237|>",
1917
+ "lstrip": false,
1918
+ "normalized": false,
1919
+ "rstrip": false,
1920
+ "single_word": false,
1921
+ "special": true
1922
+ },
1923
+ "128237": {
1924
+ "content": "<|gigatoken_238|>",
1925
+ "lstrip": false,
1926
+ "normalized": false,
1927
+ "rstrip": false,
1928
+ "single_word": false,
1929
+ "special": true
1930
+ },
1931
+ "128238": {
1932
+ "content": "<|gigatoken_239|>",
1933
+ "lstrip": false,
1934
+ "normalized": false,
1935
+ "rstrip": false,
1936
+ "single_word": false,
1937
+ "special": true
1938
+ },
1939
+ "128239": {
1940
+ "content": "<|gigatoken_240|>",
1941
+ "lstrip": false,
1942
+ "normalized": false,
1943
+ "rstrip": false,
1944
+ "single_word": false,
1945
+ "special": true
1946
+ },
1947
+ "128240": {
1948
+ "content": "<|gigatoken_241|>",
1949
+ "lstrip": false,
1950
+ "normalized": false,
1951
+ "rstrip": false,
1952
+ "single_word": false,
1953
+ "special": true
1954
+ },
1955
+ "128241": {
1956
+ "content": "<|gigatoken_242|>",
1957
+ "lstrip": false,
1958
+ "normalized": false,
1959
+ "rstrip": false,
1960
+ "single_word": false,
1961
+ "special": true
1962
+ },
1963
+ "128242": {
1964
+ "content": "<|gigatoken_243|>",
1965
+ "lstrip": false,
1966
+ "normalized": false,
1967
+ "rstrip": false,
1968
+ "single_word": false,
1969
+ "special": true
1970
+ },
1971
+ "128243": {
1972
+ "content": "<|gigatoken_244|>",
1973
+ "lstrip": false,
1974
+ "normalized": false,
1975
+ "rstrip": false,
1976
+ "single_word": false,
1977
+ "special": true
1978
+ },
1979
+ "128244": {
1980
+ "content": "<|gigatoken_245|>",
1981
+ "lstrip": false,
1982
+ "normalized": false,
1983
+ "rstrip": false,
1984
+ "single_word": false,
1985
+ "special": true
1986
+ },
1987
+ "128245": {
1988
+ "content": "<|gigatoken_246|>",
1989
+ "lstrip": false,
1990
+ "normalized": false,
1991
+ "rstrip": false,
1992
+ "single_word": false,
1993
+ "special": true
1994
+ },
1995
+ "128246": {
1996
+ "content": "<|gigatoken_247|>",
1997
+ "lstrip": false,
1998
+ "normalized": false,
1999
+ "rstrip": false,
2000
+ "single_word": false,
2001
+ "special": true
2002
+ },
2003
+ "128247": {
2004
+ "content": "<|gigatoken_248|>",
2005
+ "lstrip": false,
2006
+ "normalized": false,
2007
+ "rstrip": false,
2008
+ "single_word": false,
2009
+ "special": true
2010
+ },
2011
+ "128248": {
2012
+ "content": "<|gigatoken_249|>",
2013
+ "lstrip": false,
2014
+ "normalized": false,
2015
+ "rstrip": false,
2016
+ "single_word": false,
2017
+ "special": true
2018
+ },
2019
+ "128249": {
2020
+ "content": "<|gigatoken_250|>",
2021
+ "lstrip": false,
2022
+ "normalized": false,
2023
+ "rstrip": false,
2024
+ "single_word": false,
2025
+ "special": true
2026
+ },
2027
+ "128250": {
2028
+ "content": "<|gigatoken_251|>",
2029
+ "lstrip": false,
2030
+ "normalized": false,
2031
+ "rstrip": false,
2032
+ "single_word": false,
2033
+ "special": true
2034
+ },
2035
+ "128251": {
2036
+ "content": "<|gigatoken_252|>",
2037
+ "lstrip": false,
2038
+ "normalized": false,
2039
+ "rstrip": false,
2040
+ "single_word": false,
2041
+ "special": true
2042
+ },
2043
+ "128252": {
2044
+ "content": "<|gigatoken_253|>",
2045
+ "lstrip": false,
2046
+ "normalized": false,
2047
+ "rstrip": false,
2048
+ "single_word": false,
2049
+ "special": true
2050
+ },
2051
+ "128253": {
2052
+ "content": "<|gigatoken_254|>",
2053
+ "lstrip": false,
2054
+ "normalized": false,
2055
+ "rstrip": false,
2056
+ "single_word": false,
2057
+ "special": true
2058
+ },
2059
+ "128254": {
2060
+ "content": "<|gigatoken_255|>",
2061
+ "lstrip": false,
2062
+ "normalized": false,
2063
+ "rstrip": false,
2064
+ "single_word": false,
2065
+ "special": true
2066
+ },
2067
+ "128255": {
2068
+ "content": "<|gigatoken_256|>",
2069
+ "lstrip": false,
2070
+ "normalized": false,
2071
+ "rstrip": false,
2072
+ "single_word": false,
2073
+ "special": true
2074
+ }
2075
+ },
2076
+ "bos_token": "<s>",
2077
+ "clean_up_tokenization_spaces": true,
2078
+ "eos_token": "</s>",
2079
+ "model_max_length": 1000000000000000019884624838656,
2080
+ "tokenizer_class": "PreTrainedTokenizerFast",
2081
+ "unk_token": "<unk>"
2082
+ }