{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.22727272727272727, "eval_steps": 3, "global_step": 10, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.022727272727272728, "grad_norm": 6.847477436065674, "learning_rate": 2e-05, "loss": 3.638, "step": 1 }, { "epoch": 0.022727272727272728, "eval_loss": 3.789806842803955, "eval_runtime": 1.2259, "eval_samples_per_second": 60.364, "eval_steps_per_second": 8.157, "step": 1 }, { "epoch": 0.045454545454545456, "grad_norm": 5.260680198669434, "learning_rate": 4e-05, "loss": 3.4007, "step": 2 }, { "epoch": 0.06818181818181818, "grad_norm": 6.062076091766357, "learning_rate": 6e-05, "loss": 3.5331, "step": 3 }, { "epoch": 0.06818181818181818, "eval_loss": 3.6856677532196045, "eval_runtime": 1.219, "eval_samples_per_second": 60.704, "eval_steps_per_second": 8.203, "step": 3 }, { "epoch": 0.09090909090909091, "grad_norm": 6.924163341522217, "learning_rate": 8e-05, "loss": 3.2663, "step": 4 }, { "epoch": 0.11363636363636363, "grad_norm": 5.1875200271606445, "learning_rate": 0.0001, "loss": 3.6316, "step": 5 }, { "epoch": 0.13636363636363635, "grad_norm": 5.6750168800354, "learning_rate": 0.00012, "loss": 2.8824, "step": 6 }, { "epoch": 0.13636363636363635, "eval_loss": 2.815485715866089, "eval_runtime": 1.2233, "eval_samples_per_second": 60.492, "eval_steps_per_second": 8.175, "step": 6 }, { "epoch": 0.1590909090909091, "grad_norm": 4.6825079917907715, "learning_rate": 0.00014, "loss": 2.9952, "step": 7 }, { "epoch": 0.18181818181818182, "grad_norm": 4.181937217712402, "learning_rate": 0.00016, "loss": 2.3997, "step": 8 }, { "epoch": 0.20454545454545456, "grad_norm": 3.558675765991211, "learning_rate": 0.00018, "loss": 1.8518, "step": 9 }, { "epoch": 0.20454545454545456, "eval_loss": 1.6439586877822876, "eval_runtime": 1.2232, "eval_samples_per_second": 60.497, "eval_steps_per_second": 8.175, "step": 9 }, { "epoch": 0.22727272727272727, "grad_norm": 3.013317108154297, "learning_rate": 0.0002, "loss": 1.6307, "step": 10 } ], "logging_steps": 1, "max_steps": 10, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 3, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 8234950176276480.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }