experiment_name: "runs/transformer" dataset: src_lang: 'lo' src_tokenizer: 'BPE' src_max_seq_len: 400 tgt_lang: 'vi' tgt_tokenizer: 'WordLevel' tgt_max_seq_len: 350 train_dataset: 'train_clean.dat' validate_dataset: 'dev_clean.dat' tokenizer_file: "tokenizer_{0}.json" model: # 16629775 parameters d_model: 256 num_heads: 8 d_ff: 1024 dropout_p: 0.3 num_encoder_layers: 4 num_decoder_layers: 2 model_folder: "weights" model_basename: "transformer_" preload: "small"