max_vocab_size: 320000 num_docs: 1000000 eos_token: <|endoftext|>