OPEN-SOLAR-KO-10.7B / tokenizer_config.json
beomi's picture
Upload tokenizer
2621e5e
raw
history blame
10.3 kB
{
"added_tokens_decoder": {
"0": {
"content": "<unk>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"1": {
"content": "<s>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"2": {
"content": "</s>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"46099": {
"content": "<|sep|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"46100": {
"content": "<|endoftext|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"46101": {
"content": "<|acc|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"46102": {
"content": "<|rrn|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"46103": {
"content": "<|tel|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"46105": {
"content": "ᄀ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"46107": {
"content": "ᆪ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"46108": {
"content": "ᄂ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"46109": {
"content": "ᆬ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"46110": {
"content": "ᆭ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"46111": {
"content": "ᄃ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"46112": {
"content": "ᄄ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"46113": {
"content": "ᄅ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"46114": {
"content": "ᆰ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"46115": {
"content": "ᆱ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"46116": {
"content": "ᆲ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"46117": {
"content": "ᆳ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"46118": {
"content": "ᆴ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"46119": {
"content": "ᆵ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"46120": {
"content": "ᄚ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"46121": {
"content": "ᄆ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"46122": {
"content": "ᄇ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"46123": {
"content": "ᄈ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"46124": {
"content": "ᄡ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"46125": {
"content": "ᄉ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"46126": {
"content": "ᄊ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"46127": {
"content": "ᄋ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"46128": {
"content": "ᄌ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"46129": {
"content": "ᄍ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"46130": {
"content": "ᄎ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"46131": {
"content": "ᄏ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"46132": {
"content": "ᄐ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"46133": {
"content": "ᄑ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"46134": {
"content": "ᄒ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"46135": {
"content": "ᅡ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"46136": {
"content": "ᅢ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"46137": {
"content": "ᅣ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"46138": {
"content": "ᅤ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"46139": {
"content": "ᅥ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"46140": {
"content": "ᅦ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"46141": {
"content": "ᅧ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"46142": {
"content": "ᅨ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"46143": {
"content": "ᅩ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"46144": {
"content": "ᅪ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"46145": {
"content": "ᅫ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"46146": {
"content": "ᅬ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"46147": {
"content": "ᅭ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"46148": {
"content": "ᅮ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"46149": {
"content": "ᅯ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"46150": {
"content": "ᅰ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"46151": {
"content": "ᅱ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"46152": {
"content": "ᅲ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"46153": {
"content": "ᅳ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"46154": {
"content": "ᅴ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"46156": {
"content": "ㄲ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
}
},
"additional_special_tokens": [],
"bos_token": "<s>",
"clean_up_tokenization_spaces": false,
"eos_token": "</s>",
"fast": true,
"legacy": true,
"model_max_length": 1000000000000000019884624838656,
"pad_token": null,
"sp_model_kwargs": {},
"spaces_between_special_tokens": false,
"tokenizer_class": "LlamaTokenizer",
"unk_token": "<unk>",
"use_default_system_prompt": true
}