kkuramitsu
commited on
Commit
•
55ce49b
1
Parent(s):
796a64d
Update tokenizer_config.json
Browse files- tokenizer_config.json +6 -6
tokenizer_config.json
CHANGED
@@ -41,7 +41,7 @@
|
|
41 |
"special": true
|
42 |
},
|
43 |
"5": {
|
44 |
-
"content": "
|
45 |
"lstrip": false,
|
46 |
"normalized": false,
|
47 |
"rstrip": false,
|
@@ -49,7 +49,7 @@
|
|
49 |
"special": true
|
50 |
},
|
51 |
"6": {
|
52 |
-
"content": "
|
53 |
"lstrip": false,
|
54 |
"normalized": false,
|
55 |
"rstrip": false,
|
@@ -66,18 +66,18 @@
|
|
66 |
}
|
67 |
},
|
68 |
"additional_special_tokens": [
|
69 |
-
"
|
70 |
],
|
71 |
"bos_token": "<s|LLM-jp>",
|
|
|
72 |
"clean_up_tokenization_spaces": false,
|
73 |
-
"cls_token": "<CLS|LLM-jp>",
|
74 |
"eod_token": "<EOD|LLM-jp>",
|
75 |
"eos_token": "<EOD|LLM-jp>",
|
76 |
"extra_ids": 0,
|
|
|
77 |
"mask_token": "<mask|LLM-jp>",
|
78 |
"model_max_length": 1000000000000000019884624838656,
|
79 |
-
"pad_token": "<
|
80 |
-
"sep_token": "<SEP|LLM-jp>",
|
81 |
"sp_model_kwargs": {},
|
82 |
"tokenizer_class": "PreTrainedTokenizerFast",
|
83 |
"unk_token": "<unk|LLM-jp>"
|
|
|
41 |
"special": true
|
42 |
},
|
43 |
"5": {
|
44 |
+
"content": "<|im_start|>",
|
45 |
"lstrip": false,
|
46 |
"normalized": false,
|
47 |
"rstrip": false,
|
|
|
49 |
"special": true
|
50 |
},
|
51 |
"6": {
|
52 |
+
"content": "<|im_end|>",
|
53 |
"lstrip": false,
|
54 |
"normalized": false,
|
55 |
"rstrip": false,
|
|
|
66 |
}
|
67 |
},
|
68 |
"additional_special_tokens": [
|
69 |
+
"<EOD|LLM-jp>"
|
70 |
],
|
71 |
"bos_token": "<s|LLM-jp>",
|
72 |
+
"chat_template": "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}",
|
73 |
"clean_up_tokenization_spaces": false,
|
|
|
74 |
"eod_token": "<EOD|LLM-jp>",
|
75 |
"eos_token": "<EOD|LLM-jp>",
|
76 |
"extra_ids": 0,
|
77 |
+
"legacy": false,
|
78 |
"mask_token": "<mask|LLM-jp>",
|
79 |
"model_max_length": 1000000000000000019884624838656,
|
80 |
+
"pad_token": "<EOD|LLM-jp>",
|
|
|
81 |
"sp_model_kwargs": {},
|
82 |
"tokenizer_class": "PreTrainedTokenizerFast",
|
83 |
"unk_token": "<unk|LLM-jp>"
|