File size: 426 Bytes
2133341
 
63325a1
 
3121bf3
2133341
 
7149eca
 
2133341
7149eca
 
 
 
2133341
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
name: bpe_tokenizer
config_type: preprocessor
truncation_side: right
padding_side: right
stride: 0
pad_to_multiple_of: 0
pad_token_type_id: 0
bos_token: <s>
eos_token: </s>
unk_token: <unk>
sep_token: <sep>
pad_token: <pad>
cls_token: <cls>
mask_token: <mask>
continuing_subword_prefix: ''
end_of_word_suffix: ''
fuse_unk: false
vocab_size: 42000
min_frequency: 2
limit_alphabet: 1000
initial_alphabet: []
show_progress: true