danielhanchen commited on
Commit
07eb824
1 Parent(s): c4d5b88

Upload tokenizer

Browse files
Files changed (2) hide show
  1. tokenizer.json +12 -12
  2. tokenizer_config.json +14 -13
tokenizer.json CHANGED
@@ -26,9 +26,9 @@
26
  "content": "</s>",
27
  "single_word": false,
28
  "lstrip": false,
29
- "rstrip": false,
30
  "normalized": false,
31
- "special": true
32
  },
33
  {
34
  "id": 32000,
@@ -44,7 +44,7 @@
44
  "content": "<|assistant|>",
45
  "single_word": false,
46
  "lstrip": false,
47
- "rstrip": false,
48
  "normalized": false,
49
  "special": true
50
  },
@@ -53,7 +53,7 @@
53
  "content": "<|placeholder1|>",
54
  "single_word": false,
55
  "lstrip": false,
56
- "rstrip": false,
57
  "normalized": false,
58
  "special": true
59
  },
@@ -62,7 +62,7 @@
62
  "content": "<|placeholder2|>",
63
  "single_word": false,
64
  "lstrip": false,
65
- "rstrip": false,
66
  "normalized": false,
67
  "special": true
68
  },
@@ -71,7 +71,7 @@
71
  "content": "<|placeholder3|>",
72
  "single_word": false,
73
  "lstrip": false,
74
- "rstrip": false,
75
  "normalized": false,
76
  "special": true
77
  },
@@ -80,7 +80,7 @@
80
  "content": "<|placeholder4|>",
81
  "single_word": false,
82
  "lstrip": false,
83
- "rstrip": false,
84
  "normalized": false,
85
  "special": true
86
  },
@@ -89,7 +89,7 @@
89
  "content": "<|system|>",
90
  "single_word": false,
91
  "lstrip": false,
92
- "rstrip": false,
93
  "normalized": false,
94
  "special": true
95
  },
@@ -98,7 +98,7 @@
98
  "content": "<|end|>",
99
  "single_word": false,
100
  "lstrip": false,
101
- "rstrip": false,
102
  "normalized": false,
103
  "special": true
104
  },
@@ -107,7 +107,7 @@
107
  "content": "<|placeholder5|>",
108
  "single_word": false,
109
  "lstrip": false,
110
- "rstrip": false,
111
  "normalized": false,
112
  "special": true
113
  },
@@ -116,7 +116,7 @@
116
  "content": "<|placeholder6|>",
117
  "single_word": false,
118
  "lstrip": false,
119
- "rstrip": false,
120
  "normalized": false,
121
  "special": true
122
  },
@@ -125,7 +125,7 @@
125
  "content": "<|user|>",
126
  "single_word": false,
127
  "lstrip": false,
128
- "rstrip": false,
129
  "normalized": false,
130
  "special": true
131
  }
 
26
  "content": "</s>",
27
  "single_word": false,
28
  "lstrip": false,
29
+ "rstrip": true,
30
  "normalized": false,
31
+ "special": false
32
  },
33
  {
34
  "id": 32000,
 
44
  "content": "<|assistant|>",
45
  "single_word": false,
46
  "lstrip": false,
47
+ "rstrip": true,
48
  "normalized": false,
49
  "special": true
50
  },
 
53
  "content": "<|placeholder1|>",
54
  "single_word": false,
55
  "lstrip": false,
56
+ "rstrip": true,
57
  "normalized": false,
58
  "special": true
59
  },
 
62
  "content": "<|placeholder2|>",
63
  "single_word": false,
64
  "lstrip": false,
65
+ "rstrip": true,
66
  "normalized": false,
67
  "special": true
68
  },
 
71
  "content": "<|placeholder3|>",
72
  "single_word": false,
73
  "lstrip": false,
74
+ "rstrip": true,
75
  "normalized": false,
76
  "special": true
77
  },
 
80
  "content": "<|placeholder4|>",
81
  "single_word": false,
82
  "lstrip": false,
83
+ "rstrip": true,
84
  "normalized": false,
85
  "special": true
86
  },
 
89
  "content": "<|system|>",
90
  "single_word": false,
91
  "lstrip": false,
92
+ "rstrip": true,
93
  "normalized": false,
94
  "special": true
95
  },
 
98
  "content": "<|end|>",
99
  "single_word": false,
100
  "lstrip": false,
101
+ "rstrip": true,
102
  "normalized": false,
103
  "special": true
104
  },
 
107
  "content": "<|placeholder5|>",
108
  "single_word": false,
109
  "lstrip": false,
110
+ "rstrip": true,
111
  "normalized": false,
112
  "special": true
113
  },
 
116
  "content": "<|placeholder6|>",
117
  "single_word": false,
118
  "lstrip": false,
119
+ "rstrip": true,
120
  "normalized": false,
121
  "special": true
122
  },
 
125
  "content": "<|user|>",
126
  "single_word": false,
127
  "lstrip": false,
128
+ "rstrip": true,
129
  "normalized": false,
130
  "special": true
131
  }
tokenizer_config.json CHANGED
@@ -22,9 +22,9 @@
22
  "content": "</s>",
23
  "lstrip": false,
24
  "normalized": false,
25
- "rstrip": false,
26
  "single_word": false,
27
- "special": true
28
  },
29
  "32000": {
30
  "content": "<|endoftext|>",
@@ -38,7 +38,7 @@
38
  "content": "<|assistant|>",
39
  "lstrip": false,
40
  "normalized": false,
41
- "rstrip": false,
42
  "single_word": false,
43
  "special": true
44
  },
@@ -46,7 +46,7 @@
46
  "content": "<|placeholder1|>",
47
  "lstrip": false,
48
  "normalized": false,
49
- "rstrip": false,
50
  "single_word": false,
51
  "special": true
52
  },
@@ -54,7 +54,7 @@
54
  "content": "<|placeholder2|>",
55
  "lstrip": false,
56
  "normalized": false,
57
- "rstrip": false,
58
  "single_word": false,
59
  "special": true
60
  },
@@ -62,7 +62,7 @@
62
  "content": "<|placeholder3|>",
63
  "lstrip": false,
64
  "normalized": false,
65
- "rstrip": false,
66
  "single_word": false,
67
  "special": true
68
  },
@@ -70,7 +70,7 @@
70
  "content": "<|placeholder4|>",
71
  "lstrip": false,
72
  "normalized": false,
73
- "rstrip": false,
74
  "single_word": false,
75
  "special": true
76
  },
@@ -78,7 +78,7 @@
78
  "content": "<|system|>",
79
  "lstrip": false,
80
  "normalized": false,
81
- "rstrip": false,
82
  "single_word": false,
83
  "special": true
84
  },
@@ -86,7 +86,7 @@
86
  "content": "<|end|>",
87
  "lstrip": false,
88
  "normalized": false,
89
- "rstrip": false,
90
  "single_word": false,
91
  "special": true
92
  },
@@ -94,7 +94,7 @@
94
  "content": "<|placeholder5|>",
95
  "lstrip": false,
96
  "normalized": false,
97
- "rstrip": false,
98
  "single_word": false,
99
  "special": true
100
  },
@@ -102,7 +102,7 @@
102
  "content": "<|placeholder6|>",
103
  "lstrip": false,
104
  "normalized": false,
105
- "rstrip": false,
106
  "single_word": false,
107
  "special": true
108
  },
@@ -110,15 +110,16 @@
110
  "content": "<|user|>",
111
  "lstrip": false,
112
  "normalized": false,
113
- "rstrip": false,
114
  "single_word": false,
115
  "special": true
116
  }
117
  },
118
  "bos_token": "<s>",
119
- "chat_template": "{{ bos_token }}{% for message in messages %}{% if (message['role'] in ['user', 'system']) %}{{'<|user|>' + '\n' + message['content'] + '<|end|>' + '\n' + '<|assistant|>' + '\n'}}{% elif message['role'] == 'assistant' %}{{message['content'] + '<|end|>' + '\n'}}{% endif %}{% endfor %}",
120
  "clean_up_tokenization_spaces": false,
121
  "eos_token": "<|endoftext|>",
 
122
  "model_max_length": 4096,
123
  "pad_token": "<|endoftext|>",
124
  "padding_side": "left",
 
22
  "content": "</s>",
23
  "lstrip": false,
24
  "normalized": false,
25
+ "rstrip": true,
26
  "single_word": false,
27
+ "special": false
28
  },
29
  "32000": {
30
  "content": "<|endoftext|>",
 
38
  "content": "<|assistant|>",
39
  "lstrip": false,
40
  "normalized": false,
41
+ "rstrip": true,
42
  "single_word": false,
43
  "special": true
44
  },
 
46
  "content": "<|placeholder1|>",
47
  "lstrip": false,
48
  "normalized": false,
49
+ "rstrip": true,
50
  "single_word": false,
51
  "special": true
52
  },
 
54
  "content": "<|placeholder2|>",
55
  "lstrip": false,
56
  "normalized": false,
57
+ "rstrip": true,
58
  "single_word": false,
59
  "special": true
60
  },
 
62
  "content": "<|placeholder3|>",
63
  "lstrip": false,
64
  "normalized": false,
65
+ "rstrip": true,
66
  "single_word": false,
67
  "special": true
68
  },
 
70
  "content": "<|placeholder4|>",
71
  "lstrip": false,
72
  "normalized": false,
73
+ "rstrip": true,
74
  "single_word": false,
75
  "special": true
76
  },
 
78
  "content": "<|system|>",
79
  "lstrip": false,
80
  "normalized": false,
81
+ "rstrip": true,
82
  "single_word": false,
83
  "special": true
84
  },
 
86
  "content": "<|end|>",
87
  "lstrip": false,
88
  "normalized": false,
89
+ "rstrip": true,
90
  "single_word": false,
91
  "special": true
92
  },
 
94
  "content": "<|placeholder5|>",
95
  "lstrip": false,
96
  "normalized": false,
97
+ "rstrip": true,
98
  "single_word": false,
99
  "special": true
100
  },
 
102
  "content": "<|placeholder6|>",
103
  "lstrip": false,
104
  "normalized": false,
105
+ "rstrip": true,
106
  "single_word": false,
107
  "special": true
108
  },
 
110
  "content": "<|user|>",
111
  "lstrip": false,
112
  "normalized": false,
113
+ "rstrip": true,
114
  "single_word": false,
115
  "special": true
116
  }
117
  },
118
  "bos_token": "<s>",
119
+ "chat_template": "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') %}{{'<|user|>' + '\n' + message['content'] + '<|end|>' + '\n' + '<|assistant|>' + '\n'}}{% elif (message['role'] == 'assistant') %}{{message['content'] + '<|end|>' + '\n'}}{% endif %}{% endfor %}",
120
  "clean_up_tokenization_spaces": false,
121
  "eos_token": "<|endoftext|>",
122
+ "legacy": false,
123
  "model_max_length": 4096,
124
  "pad_token": "<|endoftext|>",
125
  "padding_side": "left",