import pytest | |
from megatron.tokenizer import train_tokenizer | |
def test_train_tokenizer(): | |
input_args = [ | |
"--json_input_dir", | |
"./tests/data/enwik8_first100.txt", | |
"--tokenizer_output_path", | |
"", | |
] | |
args = train_tokenizer.parse_args(input_args) | |
train_tokenizer.main(args) | |