shi-zheng-qxhs commited on
Commit
2cec65d
1 Parent(s): 4cfed30

Upload tokenizer

Browse files
added_tokens.json CHANGED
@@ -1,5 +1,6 @@
1
  {
2
- "<|assistant|>": 50258,
3
  "<|end|>": 50259,
 
4
  "<|user|>": 50257
5
  }
 
1
  {
2
+ "<|assistant|>": 50260,
3
  "<|end|>": 50259,
4
+ "<|system|>": 50258,
5
  "<|user|>": 50257
6
  }
special_tokens_map.json CHANGED
@@ -8,7 +8,7 @@
8
  "single_word": false
9
  },
10
  {
11
- "content": "<|assistant|>",
12
  "lstrip": false,
13
  "normalized": false,
14
  "rstrip": false,
@@ -20,6 +20,13 @@
20
  "normalized": false,
21
  "rstrip": false,
22
  "single_word": false
 
 
 
 
 
 
 
23
  }
24
  ],
25
  "bos_token": "<|endoftext|>",
 
8
  "single_word": false
9
  },
10
  {
11
+ "content": "<|system|>",
12
  "lstrip": false,
13
  "normalized": false,
14
  "rstrip": false,
 
20
  "normalized": false,
21
  "rstrip": false,
22
  "single_word": false
23
+ },
24
+ {
25
+ "content": "<|assistant|>",
26
+ "lstrip": false,
27
+ "normalized": false,
28
+ "rstrip": false,
29
+ "single_word": false
30
  }
31
  ],
32
  "bos_token": "<|endoftext|>",
tokenizer.json CHANGED
@@ -23,7 +23,7 @@
23
  },
24
  {
25
  "id": 50258,
26
- "content": "<|assistant|>",
27
  "single_word": false,
28
  "lstrip": false,
29
  "rstrip": false,
@@ -38,6 +38,15 @@
38
  "rstrip": false,
39
  "normalized": false,
40
  "special": true
 
 
 
 
 
 
 
 
 
41
  }
42
  ],
43
  "normalizer": null,
 
23
  },
24
  {
25
  "id": 50258,
26
+ "content": "<|system|>",
27
  "single_word": false,
28
  "lstrip": false,
29
  "rstrip": false,
 
38
  "rstrip": false,
39
  "normalized": false,
40
  "special": true
41
+ },
42
+ {
43
+ "id": 50260,
44
+ "content": "<|assistant|>",
45
+ "single_word": false,
46
+ "lstrip": false,
47
+ "rstrip": false,
48
+ "normalized": false,
49
+ "special": true
50
  }
51
  ],
52
  "normalizer": null,
tokenizer_config.json CHANGED
@@ -18,7 +18,7 @@
18
  "special": true
19
  },
20
  "50258": {
21
- "content": "<|assistant|>",
22
  "lstrip": false,
23
  "normalized": false,
24
  "rstrip": false,
@@ -32,14 +32,24 @@
32
  "rstrip": false,
33
  "single_word": false,
34
  "special": true
 
 
 
 
 
 
 
 
35
  }
36
  },
37
  "additional_special_tokens": [
38
  "<|user|>",
39
- "<|assistant|>",
40
- "<|end|>"
 
41
  ],
42
  "bos_token": "<|endoftext|>",
 
43
  "clean_up_tokenization_spaces": true,
44
  "eos_token": "<|endoftext|>",
45
  "model_max_length": 1024,
 
18
  "special": true
19
  },
20
  "50258": {
21
+ "content": "<|system|>",
22
  "lstrip": false,
23
  "normalized": false,
24
  "rstrip": false,
 
32
  "rstrip": false,
33
  "single_word": false,
34
  "special": true
35
+ },
36
+ "50260": {
37
+ "content": "<|assistant|>",
38
+ "lstrip": false,
39
+ "normalized": false,
40
+ "rstrip": false,
41
+ "single_word": false,
42
+ "special": true
43
  }
44
  },
45
  "additional_special_tokens": [
46
  "<|user|>",
47
+ "<|system|>",
48
+ "<|end|>",
49
+ "<|assistant|>"
50
  ],
51
  "bos_token": "<|endoftext|>",
52
+ "chat_template": "{% for message in messages %}{% if message['role'] == 'user' %}{{ '<|endoftext|><|user|>\n' + message['content'] + '<|end|>\n' }}{% elif message['role'] == 'system' %}{{ '<|system|>\n' + message['content'] + '<|end|>\n' }}{% elif message['role'] == 'assistant' %}{{ '<|assistant|>\n' + message['content'] + '<|end|><|endoftext|>\n' }}{% endif %}{% endfor %}",
53
  "clean_up_tokenization_spaces": true,
54
  "eos_token": "<|endoftext|>",
55
  "model_max_length": 1024,