{ "best_metric": null, "best_model_checkpoint": null, "epoch": 14.634146341463415, "eval_steps": 500, "global_step": 150, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.98, "learning_rate": 4.9863047384206835e-05, "loss": 2.7294, "step": 10 }, { "epoch": 1.95, "learning_rate": 4.9453690018345144e-05, "loss": 3.5475, "step": 20 }, { "epoch": 2.93, "learning_rate": 4.9453690018345144e-05, "loss": 5.3042, "step": 30 }, { "epoch": 3.9, "learning_rate": 4.9214579028215776e-05, "loss": 6.5608, "step": 40 }, { "epoch": 4.88, "learning_rate": 4.9214579028215776e-05, "loss": 12.436, "step": 50 }, { "epoch": 5.85, "learning_rate": 4.9214579028215776e-05, "loss": 8.1905, "step": 60 }, { "epoch": 6.83, "learning_rate": 4.9214579028215776e-05, "loss": 5.0973, "step": 70 }, { "epoch": 7.8, "learning_rate": 4.9214579028215776e-05, "loss": 3.3543, "step": 80 }, { "epoch": 8.78, "learning_rate": 4.893298743830168e-05, "loss": 9.9021, "step": 90 }, { "epoch": 9.76, "learning_rate": 4.893298743830168e-05, "loss": 16.1257, "step": 100 }, { "epoch": 10.73, "learning_rate": 4.893298743830168e-05, "loss": 15.988, "step": 110 }, { "epoch": 11.71, "learning_rate": 4.877641290737884e-05, "loss": 5.743, "step": 120 }, { "epoch": 12.68, "learning_rate": 4.860940925593703e-05, "loss": 4.5123, "step": 130 }, { "epoch": 13.66, "learning_rate": 4.860940925593703e-05, "loss": 8.7191, "step": 140 }, { "epoch": 14.63, "learning_rate": 4.860940925593703e-05, "loss": 8.9813, "step": 150 }, { "epoch": 14.63, "step": 150, "total_flos": 3.550292723048448e+16, "train_loss": 7.812765197753906, "train_runtime": 716.3495, "train_samples_per_second": 3.413, "train_steps_per_second": 0.209 } ], "logging_steps": 10, "max_steps": 150, "num_train_epochs": 15, "save_steps": 1000, "total_flos": 3.550292723048448e+16, "trial_name": null, "trial_params": null }