Merge pull request #21 from NanoCode012/patch-1
Browse files
src/axolotl/utils/trainer.py
CHANGED
@@ -104,8 +104,8 @@ def setup_trainer(cfg, train_dataset, eval_dataset, model, tokenizer):
|
|
104 |
group_by_length=cfg.group_by_length,
|
105 |
report_to="wandb" if cfg.use_wandb else None,
|
106 |
run_name=cfg.wandb_run_id if cfg.use_wandb else None,
|
107 |
-
optim=cfg.optimizer if cfg.optimizer else
|
108 |
-
lr_scheduler_type=cfg.lr_scheduler if cfg.lr_scheduler not in ("one_cycle", "log_sweep") else "cosine",
|
109 |
weight_decay=cfg.weight_decay if cfg.weight_decay is not None else 0.0,
|
110 |
**training_arguments_kwargs,
|
111 |
)
|
|
|
104 |
group_by_length=cfg.group_by_length,
|
105 |
report_to="wandb" if cfg.use_wandb else None,
|
106 |
run_name=cfg.wandb_run_id if cfg.use_wandb else None,
|
107 |
+
optim=cfg.optimizer if cfg.optimizer else "adamw_hf",
|
108 |
+
lr_scheduler_type=cfg.lr_scheduler if cfg.lr_scheduler and cfg.lr_scheduler not in ("one_cycle", "log_sweep") else "cosine",
|
109 |
weight_decay=cfg.weight_decay if cfg.weight_decay is not None else 0.0,
|
110 |
**training_arguments_kwargs,
|
111 |
)
|