{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.25296442687747034, "eval_steps": 16, "global_step": 16, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.015810276679841896, "grad_norm": 3.106086254119873, "learning_rate": 2.0000000000000003e-06, "loss": 0.2762, "step": 1 }, { "epoch": 0.015810276679841896, "eval_loss": 0.34248441457748413, "eval_runtime": 9.4978, "eval_samples_per_second": 11.266, "eval_steps_per_second": 2.843, "step": 1 }, { "epoch": 0.03162055335968379, "grad_norm": 0.6217677593231201, "learning_rate": 4.000000000000001e-06, "loss": 0.2623, "step": 2 }, { "epoch": 0.04743083003952569, "grad_norm": 1.1419365406036377, "learning_rate": 6e-06, "loss": 0.3943, "step": 3 }, { "epoch": 0.06324110671936758, "grad_norm": 0.8881447315216064, "learning_rate": 8.000000000000001e-06, "loss": 0.1572, "step": 4 }, { "epoch": 0.07905138339920949, "grad_norm": 0.7020868062973022, "learning_rate": 1e-05, "loss": 0.1578, "step": 5 }, { "epoch": 0.09486166007905138, "grad_norm": 1.2612448930740356, "learning_rate": 1.2e-05, "loss": 0.2245, "step": 6 }, { "epoch": 0.11067193675889328, "grad_norm": 1.2954490184783936, "learning_rate": 1.4000000000000001e-05, "loss": 0.2101, "step": 7 }, { "epoch": 0.12648221343873517, "grad_norm": 0.8434045314788818, "learning_rate": 1.6000000000000003e-05, "loss": 0.1529, "step": 8 }, { "epoch": 0.1422924901185771, "grad_norm": 0.9214808940887451, "learning_rate": 1.8e-05, "loss": 0.1522, "step": 9 }, { "epoch": 0.15810276679841898, "grad_norm": 1.3638683557510376, "learning_rate": 2e-05, "loss": 0.3282, "step": 10 }, { "epoch": 0.17391304347826086, "grad_norm": 2.2487895488739014, "learning_rate": 2.2000000000000003e-05, "loss": 0.472, "step": 11 }, { "epoch": 0.18972332015810275, "grad_norm": 1.98398756980896, "learning_rate": 2.4e-05, "loss": 0.4099, "step": 12 }, { "epoch": 0.20553359683794467, "grad_norm": 1.796846866607666, "learning_rate": 2.6000000000000002e-05, "loss": 0.3184, "step": 13 }, { "epoch": 0.22134387351778656, "grad_norm": 1.6363037824630737, "learning_rate": 2.8000000000000003e-05, "loss": 0.3551, "step": 14 }, { "epoch": 0.23715415019762845, "grad_norm": 1.927720308303833, "learning_rate": 3e-05, "loss": 0.405, "step": 15 }, { "epoch": 0.25296442687747034, "grad_norm": 1.0266072750091553, "learning_rate": 3.2000000000000005e-05, "loss": 0.2991, "step": 16 }, { "epoch": 0.25296442687747034, "eval_loss": 0.21815715730190277, "eval_runtime": 8.042, "eval_samples_per_second": 13.305, "eval_steps_per_second": 3.357, "step": 16 } ], "logging_steps": 1, "max_steps": 63, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 16, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2383975687389184.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }