mode: ft device: gpu precision: bf16 eval_only: false predict_only: false seed: 23 model: klass: hf_t5 compile: true name: google/t5-efficient-mini-nl24 random_init: false checkpoint_path: '' data: dataset: flan max_seq_len: 1024 max_target_len: 128 num_workers: 8 n_eval_examples: 500 exec_file_path: . data_dir: . task_dir: . optim: name: adamw base_lr: 5.0e-05 final_cosine: 1.0e-06 lr_scheduler: constant epochs: -1 batch_size: 64 grad_acc: 8 weight_decay: 0.001 grad_clip: 1.0 total_steps: 25000 warmup_steps: 2000 eval: steps: 500 every_steps: 4000 checkpoint: every_steps: 5000 logging: neptune: false neptune_creds: project: null api_token: null tags: '' every_steps: 50 grad_l2: true weights_l2: true