name: sentencepiece_unigram_tokenizer
config_type: preprocessor
truncation_side: right
padding_side: right
stride: 0
pad_to_multiple_of: 0
pad_token_type_id: 0
bos_token:
eos_token:
unk_token:
sep_token:
pad_token:
cls_token:
mask_token:
continuing_subword_prefix: ''
replacement: _
add_prefix_space: true
end_of_word_suffix: ''
fuse_unk: false
vocab_size: 32103
min_frequency: 2
limit_alphabet: 1000
initial_alphabet: []
show_progress: true