File size: 482 Bytes
2b2d98f ab92be3 5cdb737 2b2d98f dd73fd7 2b2d98f dd73fd7 2b2d98f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 |
name: sentencepiece_unigram_tokenizer
config_type: preprocessor
truncation_side: right
padding_side: right
stride: 0
pad_to_multiple_of: 0
pad_token_type_id: 0
bos_token: <s>
eos_token: </s>
unk_token: <unk>
sep_token: <sep>
pad_token: <pad>
cls_token: <cls>
mask_token: <mask>
continuing_subword_prefix: ''
replacement: _
add_prefix_space: true
end_of_word_suffix: ''
fuse_unk: false
vocab_size: 32103
min_frequency: 2
limit_alphabet: 1000
initial_alphabet: []
show_progress: true
|