tulu-2-13b / tokenizer_config.json
chujiezheng's picture
Update tokenizer_config.json
751b76f verified
raw
history blame
1.13 kB
{
"add_bos_token":true,
"add_eos_token":false,
"model_max_length":2048,
"pad_token":null,
"sp_model_kwargs":{
},
"tokenizer_class":"LlamaTokenizer",
"clean_up_tokenization_spaces":false,
"bos_token":{
"__type":"AddedToken",
"content":"<s>",
"lstrip":false,
"normalized":true,
"rstrip":false,
"single_word":false
},
"eos_token":{
"__type":"AddedToken",
"content":"</s>",
"lstrip":false,
"normalized":true,
"rstrip":false,
"single_word":false
},
"unk_token":{
"__type":"AddedToken",
"content":"<unk>",
"lstrip":false,
"normalized":true,
"rstrip":false,
"single_word":false
},
"chat_template": "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|user|>\n' + message['content'] }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|assistant|>\n' + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '<|assistant|>' }}\n{% endif %}\n{% endfor %}"
}