soumitsr commited on
Commit
76fd0eb
·
verified ·
1 Parent(s): efd4f54

Upload tokenizer

Browse files
Files changed (2) hide show
  1. special_tokens_map.json +3 -18
  2. tokenizer_config.json +4 -18
special_tokens_map.json CHANGED
@@ -1,32 +1,17 @@
1
  {
2
  "additional_special_tokens": [
3
- "<|endoftext|>",
4
  "<|im_start|>",
5
- "<|im_end|>",
6
- "<repo_name>",
7
- "<reponame>",
8
- "<file_sep>",
9
- "<filename>",
10
- "<gh_stars>",
11
- "<issue_start>",
12
- "<issue_comment>",
13
- "<issue_closed>",
14
- "<jupyter_start>",
15
- "<jupyter_text>",
16
- "<jupyter_code>",
17
- "<jupyter_output>",
18
- "<jupyter_script>",
19
- "<empty_output>"
20
  ],
21
  "bos_token": {
22
- "content": "<|endoftext|>",
23
  "lstrip": false,
24
  "normalized": false,
25
  "rstrip": false,
26
  "single_word": false
27
  },
28
  "eos_token": {
29
- "content": "<|endoftext|>",
30
  "lstrip": false,
31
  "normalized": false,
32
  "rstrip": false,
 
1
  {
2
  "additional_special_tokens": [
 
3
  "<|im_start|>",
4
+ "<|im_end|>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  ],
6
  "bos_token": {
7
+ "content": "<|im_start|>",
8
  "lstrip": false,
9
  "normalized": false,
10
  "rstrip": false,
11
  "single_word": false
12
  },
13
  "eos_token": {
14
+ "content": "<|im_end|>",
15
  "lstrip": false,
16
  "normalized": false,
17
  "rstrip": false,
tokenizer_config.json CHANGED
@@ -155,27 +155,13 @@
155
  }
156
  },
157
  "additional_special_tokens": [
158
- "<|endoftext|>",
159
  "<|im_start|>",
160
- "<|im_end|>",
161
- "<repo_name>",
162
- "<reponame>",
163
- "<file_sep>",
164
- "<filename>",
165
- "<gh_stars>",
166
- "<issue_start>",
167
- "<issue_comment>",
168
- "<issue_closed>",
169
- "<jupyter_start>",
170
- "<jupyter_text>",
171
- "<jupyter_code>",
172
- "<jupyter_output>",
173
- "<jupyter_script>",
174
- "<empty_output>"
175
  ],
176
- "bos_token": "<|endoftext|>",
 
177
  "clean_up_tokenization_spaces": false,
178
- "eos_token": "<|endoftext|>",
179
  "model_max_length": 16384,
180
  "pad_token": "<|PAD_TOKEN|>",
181
  "padding_side": "left",
 
155
  }
156
  },
157
  "additional_special_tokens": [
 
158
  "<|im_start|>",
159
+ "<|im_end|>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
160
  ],
161
+ "bos_token": "<|im_start|>",
162
+ "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful AI assistant named SmolLM, trained by Hugging Face<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
163
  "clean_up_tokenization_spaces": false,
164
+ "eos_token": "<|im_end|>",
165
  "model_max_length": 16384,
166
  "pad_token": "<|PAD_TOKEN|>",
167
  "padding_side": "left",