"tokenizer_class": "BertTokenizer", "special_tokens": { "0": { "content": "[PAD]", "lstrip": False, "normalized": False, "rstrip": False, "single_word": False, "special": True }, "1": { "content": "[UNK]", "lstrip": False, "normalized": False, "rstrip": False, "single_word": False, "special": True }, "2": { "content": "[CLS]", "lstrip": False, "normalized": False, "rstrip": False, "single_word": False, "special": True }, "3": { "content": "[SEP]", "lstrip": False, "normalized": False, "rstrip": False, "single_word": False, "special": True }, "4": { "content": "[MASK]", "lstrip": False, "normalized": False, "rstrip": False, "single_word": False, "special": True } }, "clean_up_tokenization_spaces": True, "cls_token": "[CLS]", "do_basic_tokenize": True, "do_lower_case": False, "mask_token": "[MASK]", "max_len": 512, "model_max_length": 512, "pad_token": "[PAD]", "sep_token": "[SEP]", "tokenize_chinese_chars": True, "unk_token": "[UNK]" }