redditbot / tokenizer_config.json
lagodw's picture
add tokenizer
1d50d0b
raw
history blame contribute delete
871 Bytes
{"errors": "replace", "unk_token": {"content": "<unk>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "bos_token": {"content": "<|startoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "eos_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "add_prefix_space": false, "pad_token": "<|pad|>", "sep_token": "<sep>", "cls_token": "<cls>", "mask_token": "<mask>", "model_max_length": 1024, "special_tokens_map_file": null, "tokenizer_file": "/root/.cache/huggingface/transformers/16a2f78023c8dc511294f0c97b5e10fde3ef9889ad6d11ffaa2a00714e73926e.cf2d0ecb83b6df91b3dbb53f1d1e4c311578bfd3aa0e04934215a49bf9898df0", "name_or_path": "gpt2", "tokenizer_class": "GPT2Tokenizer"}