hooking-dev commited on
Commit
c1705df
1 Parent(s): 8900a5f

Upload tokenizer

Browse files
Files changed (3) hide show
  1. special_tokens_map.json +3 -21
  2. tokenizer.json +1 -1
  3. tokenizer_config.json +7 -10
special_tokens_map.json CHANGED
@@ -1,23 +1,5 @@
1
  {
2
- "bos_token": {
3
- "content": "<|begin_of_text|>",
4
- "lstrip": false,
5
- "normalized": false,
6
- "rstrip": false,
7
- "single_word": false
8
- },
9
- "eos_token": {
10
- "content": "<|end_of_text|>",
11
- "lstrip": false,
12
- "normalized": false,
13
- "rstrip": false,
14
- "single_word": false
15
- },
16
- "pad_token": {
17
- "content": "<|end_of_text|>",
18
- "lstrip": false,
19
- "normalized": false,
20
- "rstrip": false,
21
- "single_word": false
22
- }
23
  }
 
1
  {
2
+ "bos_token": "<|begin_of_text|>",
3
+ "eos_token": "<|im_end|>",
4
+ "pad_token": "<|im_end|>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  }
tokenizer.json CHANGED
@@ -14,7 +14,7 @@
14
  },
15
  {
16
  "id": 128001,
17
- "content": "<|end_of_text|>",
18
  "single_word": false,
19
  "lstrip": false,
20
  "rstrip": false,
 
14
  },
15
  {
16
  "id": 128001,
17
+ "content": "<|im_end|>",
18
  "single_word": false,
19
  "lstrip": false,
20
  "rstrip": false,
tokenizer_config.json CHANGED
@@ -9,7 +9,7 @@
9
  "special": true
10
  },
11
  "128001": {
12
- "content": "<|end_of_text|>",
13
  "lstrip": false,
14
  "normalized": false,
15
  "rstrip": false,
@@ -2050,14 +2050,11 @@
2050
  }
2051
  },
2052
  "bos_token": "<|begin_of_text|>",
 
2053
  "clean_up_tokenization_spaces": true,
2054
- "eos_token": "<|end_of_text|>",
2055
- "model_input_names": [
2056
- "input_ids",
2057
- "attention_mask"
2058
- ],
2059
- "model_max_length": 8192,
2060
- "pad_token": "<|end_of_text|>",
2061
- "padding_side": "left",
2062
- "tokenizer_class": "PreTrainedTokenizerFast"
2063
  }
 
9
  "special": true
10
  },
11
  "128001": {
12
+ "content": "<|im_end|>",
13
  "lstrip": false,
14
  "normalized": false,
15
  "rstrip": false,
 
2050
  }
2051
  },
2052
  "bos_token": "<|begin_of_text|>",
2053
+ "chat_template": "{% for message in messages %}{% if message['role'] == 'user' %}{{'<|im_start|>user\n' + message['content'] + '<|im_end|>\n'}}{% elif message['role'] == 'assistant' %}{{'<|im_start|>assistant\n' + message['content'] + '<|im_end|>\n' }}{% else %}{{ '<|im_start|>system\n' + message['content'] + '<|im_end|>\n' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
2054
  "clean_up_tokenization_spaces": true,
2055
+ "eos_token": "<|im_end|>",
2056
+ "model_max_length": 1000000000000000019884624838656,
2057
+ "pad_token": "<|im_end|>",
2058
+ "tokenizer_class": "PreTrainedTokenizerFast",
2059
+ "unk_token": null
 
 
 
 
2060
  }