|
experiment_name: 'runs/transformer_big' |
|
|
|
dataset: |
|
src_lang: 'lo' |
|
src_tokenizer: 'BPE' |
|
src_max_seq_len: 400 |
|
tgt_lang: 'vi' |
|
tgt_tokenizer: 'WordLevel' |
|
tgt_max_seq_len: 350 |
|
train_dataset: 'train_clean.dat' |
|
validate_dataset: 'dev_clean.dat' |
|
tokenizer_file: "tokenizer_{0}.json" |
|
bleu_dataset: 'test2023' |
|
|
|
model: |
|
d_model: 512 |
|
num_heads: 8 |
|
d_ff: 2048 |
|
dropout_p: 0.3 |
|
num_encoder_layers: 4 |
|
num_decoder_layers: 2 |
|
model_folder: "weights" |
|
model_basename: "transformer_" |
|
preload: "big" |
|
|
|
train: |
|
lr: 0.0001 |
|
batch_size: 16 |
|
num_epochs: 40 |
|
label_smoothing: 0.1 |
|
on_colab: True |
|
patience: 100 |
|
warm_up_steps: 700 |
|
|