yujiepan commited on
Commit
7244d2c
1 Parent(s): 34bf872

Upload tokenizer

Browse files
Files changed (3) hide show
  1. README.md +2 -2
  2. tokenizer.json +12 -12
  3. tokenizer_config.json +14 -13
README.md CHANGED
@@ -1,11 +1,11 @@
1
  ---
 
2
  pipeline_tag: text-generation
3
  inference: true
4
  widget:
5
- - text: 'Hello!'
6
  example_title: Hello world
7
  group: Python
8
- library_name: transformers
9
  ---
10
 
11
  This model is randomly initialized, using the config from [microsoft/Phi-3-mini-128k-instruct](https://huggingface.co/microsoft/Phi-3-mini-128k-instruct) but with smaller size.
 
1
  ---
2
+ library_name: transformers
3
  pipeline_tag: text-generation
4
  inference: true
5
  widget:
6
+ - text: Hello!
7
  example_title: Hello world
8
  group: Python
 
9
  ---
10
 
11
  This model is randomly initialized, using the config from [microsoft/Phi-3-mini-128k-instruct](https://huggingface.co/microsoft/Phi-3-mini-128k-instruct) but with smaller size.
tokenizer.json CHANGED
@@ -26,9 +26,9 @@
26
  "content": "</s>",
27
  "single_word": false,
28
  "lstrip": false,
29
- "rstrip": false,
30
  "normalized": false,
31
- "special": true
32
  },
33
  {
34
  "id": 32000,
@@ -44,7 +44,7 @@
44
  "content": "<|assistant|>",
45
  "single_word": false,
46
  "lstrip": false,
47
- "rstrip": false,
48
  "normalized": false,
49
  "special": true
50
  },
@@ -53,7 +53,7 @@
53
  "content": "<|placeholder1|>",
54
  "single_word": false,
55
  "lstrip": false,
56
- "rstrip": false,
57
  "normalized": false,
58
  "special": true
59
  },
@@ -62,7 +62,7 @@
62
  "content": "<|placeholder2|>",
63
  "single_word": false,
64
  "lstrip": false,
65
- "rstrip": false,
66
  "normalized": false,
67
  "special": true
68
  },
@@ -71,7 +71,7 @@
71
  "content": "<|placeholder3|>",
72
  "single_word": false,
73
  "lstrip": false,
74
- "rstrip": false,
75
  "normalized": false,
76
  "special": true
77
  },
@@ -80,7 +80,7 @@
80
  "content": "<|placeholder4|>",
81
  "single_word": false,
82
  "lstrip": false,
83
- "rstrip": false,
84
  "normalized": false,
85
  "special": true
86
  },
@@ -89,7 +89,7 @@
89
  "content": "<|system|>",
90
  "single_word": false,
91
  "lstrip": false,
92
- "rstrip": false,
93
  "normalized": false,
94
  "special": true
95
  },
@@ -98,7 +98,7 @@
98
  "content": "<|end|>",
99
  "single_word": false,
100
  "lstrip": false,
101
- "rstrip": false,
102
  "normalized": false,
103
  "special": true
104
  },
@@ -107,7 +107,7 @@
107
  "content": "<|placeholder5|>",
108
  "single_word": false,
109
  "lstrip": false,
110
- "rstrip": false,
111
  "normalized": false,
112
  "special": true
113
  },
@@ -116,7 +116,7 @@
116
  "content": "<|placeholder6|>",
117
  "single_word": false,
118
  "lstrip": false,
119
- "rstrip": false,
120
  "normalized": false,
121
  "special": true
122
  },
@@ -125,7 +125,7 @@
125
  "content": "<|user|>",
126
  "single_word": false,
127
  "lstrip": false,
128
- "rstrip": false,
129
  "normalized": false,
130
  "special": true
131
  }
 
26
  "content": "</s>",
27
  "single_word": false,
28
  "lstrip": false,
29
+ "rstrip": true,
30
  "normalized": false,
31
+ "special": false
32
  },
33
  {
34
  "id": 32000,
 
44
  "content": "<|assistant|>",
45
  "single_word": false,
46
  "lstrip": false,
47
+ "rstrip": true,
48
  "normalized": false,
49
  "special": true
50
  },
 
53
  "content": "<|placeholder1|>",
54
  "single_word": false,
55
  "lstrip": false,
56
+ "rstrip": true,
57
  "normalized": false,
58
  "special": true
59
  },
 
62
  "content": "<|placeholder2|>",
63
  "single_word": false,
64
  "lstrip": false,
65
+ "rstrip": true,
66
  "normalized": false,
67
  "special": true
68
  },
 
71
  "content": "<|placeholder3|>",
72
  "single_word": false,
73
  "lstrip": false,
74
+ "rstrip": true,
75
  "normalized": false,
76
  "special": true
77
  },
 
80
  "content": "<|placeholder4|>",
81
  "single_word": false,
82
  "lstrip": false,
83
+ "rstrip": true,
84
  "normalized": false,
85
  "special": true
86
  },
 
89
  "content": "<|system|>",
90
  "single_word": false,
91
  "lstrip": false,
92
+ "rstrip": true,
93
  "normalized": false,
94
  "special": true
95
  },
 
98
  "content": "<|end|>",
99
  "single_word": false,
100
  "lstrip": false,
101
+ "rstrip": true,
102
  "normalized": false,
103
  "special": true
104
  },
 
107
  "content": "<|placeholder5|>",
108
  "single_word": false,
109
  "lstrip": false,
110
+ "rstrip": true,
111
  "normalized": false,
112
  "special": true
113
  },
 
116
  "content": "<|placeholder6|>",
117
  "single_word": false,
118
  "lstrip": false,
119
+ "rstrip": true,
120
  "normalized": false,
121
  "special": true
122
  },
 
125
  "content": "<|user|>",
126
  "single_word": false,
127
  "lstrip": false,
128
+ "rstrip": true,
129
  "normalized": false,
130
  "special": true
131
  }
tokenizer_config.json CHANGED
@@ -1,6 +1,7 @@
1
  {
2
  "add_bos_token": true,
3
  "add_eos_token": false,
 
4
  "added_tokens_decoder": {
5
  "0": {
6
  "content": "<unk>",
@@ -22,9 +23,9 @@
22
  "content": "</s>",
23
  "lstrip": false,
24
  "normalized": false,
25
- "rstrip": false,
26
  "single_word": false,
27
- "special": true
28
  },
29
  "32000": {
30
  "content": "<|endoftext|>",
@@ -38,7 +39,7 @@
38
  "content": "<|assistant|>",
39
  "lstrip": false,
40
  "normalized": false,
41
- "rstrip": false,
42
  "single_word": false,
43
  "special": true
44
  },
@@ -46,7 +47,7 @@
46
  "content": "<|placeholder1|>",
47
  "lstrip": false,
48
  "normalized": false,
49
- "rstrip": false,
50
  "single_word": false,
51
  "special": true
52
  },
@@ -54,7 +55,7 @@
54
  "content": "<|placeholder2|>",
55
  "lstrip": false,
56
  "normalized": false,
57
- "rstrip": false,
58
  "single_word": false,
59
  "special": true
60
  },
@@ -62,7 +63,7 @@
62
  "content": "<|placeholder3|>",
63
  "lstrip": false,
64
  "normalized": false,
65
- "rstrip": false,
66
  "single_word": false,
67
  "special": true
68
  },
@@ -70,7 +71,7 @@
70
  "content": "<|placeholder4|>",
71
  "lstrip": false,
72
  "normalized": false,
73
- "rstrip": false,
74
  "single_word": false,
75
  "special": true
76
  },
@@ -78,7 +79,7 @@
78
  "content": "<|system|>",
79
  "lstrip": false,
80
  "normalized": false,
81
- "rstrip": false,
82
  "single_word": false,
83
  "special": true
84
  },
@@ -86,7 +87,7 @@
86
  "content": "<|end|>",
87
  "lstrip": false,
88
  "normalized": false,
89
- "rstrip": false,
90
  "single_word": false,
91
  "special": true
92
  },
@@ -94,7 +95,7 @@
94
  "content": "<|placeholder5|>",
95
  "lstrip": false,
96
  "normalized": false,
97
- "rstrip": false,
98
  "single_word": false,
99
  "special": true
100
  },
@@ -102,7 +103,7 @@
102
  "content": "<|placeholder6|>",
103
  "lstrip": false,
104
  "normalized": false,
105
- "rstrip": false,
106
  "single_word": false,
107
  "special": true
108
  },
@@ -110,13 +111,13 @@
110
  "content": "<|user|>",
111
  "lstrip": false,
112
  "normalized": false,
113
- "rstrip": false,
114
  "single_word": false,
115
  "special": true
116
  }
117
  },
118
  "bos_token": "<s>",
119
- "chat_template": "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'system') %}{{'<|system|>' + '\n' + message['content'] + '<|end|>' + '\n'}}{% elif (message['role'] == 'user') %}{{'<|user|>' + '\n' + message['content'] + '<|end|>' + '\n' + '<|assistant|>' + '\n'}}{% elif message['role'] == 'assistant' %}{{message['content'] + '<|end|>' + '\n'}}{% endif %}{% endfor %}",
120
  "clean_up_tokenization_spaces": false,
121
  "eos_token": "<|endoftext|>",
122
  "legacy": false,
 
1
  {
2
  "add_bos_token": true,
3
  "add_eos_token": false,
4
+ "add_prefix_space": null,
5
  "added_tokens_decoder": {
6
  "0": {
7
  "content": "<unk>",
 
23
  "content": "</s>",
24
  "lstrip": false,
25
  "normalized": false,
26
+ "rstrip": true,
27
  "single_word": false,
28
+ "special": false
29
  },
30
  "32000": {
31
  "content": "<|endoftext|>",
 
39
  "content": "<|assistant|>",
40
  "lstrip": false,
41
  "normalized": false,
42
+ "rstrip": true,
43
  "single_word": false,
44
  "special": true
45
  },
 
47
  "content": "<|placeholder1|>",
48
  "lstrip": false,
49
  "normalized": false,
50
+ "rstrip": true,
51
  "single_word": false,
52
  "special": true
53
  },
 
55
  "content": "<|placeholder2|>",
56
  "lstrip": false,
57
  "normalized": false,
58
+ "rstrip": true,
59
  "single_word": false,
60
  "special": true
61
  },
 
63
  "content": "<|placeholder3|>",
64
  "lstrip": false,
65
  "normalized": false,
66
+ "rstrip": true,
67
  "single_word": false,
68
  "special": true
69
  },
 
71
  "content": "<|placeholder4|>",
72
  "lstrip": false,
73
  "normalized": false,
74
+ "rstrip": true,
75
  "single_word": false,
76
  "special": true
77
  },
 
79
  "content": "<|system|>",
80
  "lstrip": false,
81
  "normalized": false,
82
+ "rstrip": true,
83
  "single_word": false,
84
  "special": true
85
  },
 
87
  "content": "<|end|>",
88
  "lstrip": false,
89
  "normalized": false,
90
+ "rstrip": true,
91
  "single_word": false,
92
  "special": true
93
  },
 
95
  "content": "<|placeholder5|>",
96
  "lstrip": false,
97
  "normalized": false,
98
+ "rstrip": true,
99
  "single_word": false,
100
  "special": true
101
  },
 
103
  "content": "<|placeholder6|>",
104
  "lstrip": false,
105
  "normalized": false,
106
+ "rstrip": true,
107
  "single_word": false,
108
  "special": true
109
  },
 
111
  "content": "<|user|>",
112
  "lstrip": false,
113
  "normalized": false,
114
+ "rstrip": true,
115
  "single_word": false,
116
  "special": true
117
  }
118
  },
119
  "bos_token": "<s>",
120
+ "chat_template": "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') %}{{'<|user|>' + '\n' + message['content'] + '<|end|>' + '\n' + '<|assistant|>' + '\n'}}{% elif (message['role'] == 'assistant') %}{{message['content'] + '<|end|>' + '\n'}}{% endif %}{% endfor %}",
121
  "clean_up_tokenization_spaces": false,
122
  "eos_token": "<|endoftext|>",
123
  "legacy": false,