{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.5, "eval_steps": 3, "global_step": 57, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.008771929824561403, "grad_norm": 0.0, "learning_rate": 0.0, "loss": 2.0802, "step": 1 }, { "epoch": 0.02631578947368421, "grad_norm": 0.0, "learning_rate": 0.0, "loss": 2.0494, "step": 3 }, { "epoch": 0.05263157894736842, "grad_norm": 2.4723047601512564, "learning_rate": 6.000000000000001e-07, "loss": 2.0771, "step": 6 }, { "epoch": 0.07894736842105263, "grad_norm": 2.106898891910819, "learning_rate": 1.8e-06, "loss": 2.0245, "step": 9 }, { "epoch": 0.10526315789473684, "grad_norm": 0.8386924455230856, "learning_rate": 2.9432692307692307e-06, "loss": 2.0568, "step": 12 }, { "epoch": 0.13157894736842105, "grad_norm": 1.1653346026055935, "learning_rate": 2.7730769230769233e-06, "loss": 2.0326, "step": 15 }, { "epoch": 0.15789473684210525, "grad_norm": 1.5574035133942843, "learning_rate": 2.6028846153846155e-06, "loss": 2.0396, "step": 18 }, { "epoch": 0.18421052631578946, "grad_norm": 0.9621595083981531, "learning_rate": 2.4326923076923077e-06, "loss": 2.0336, "step": 21 }, { "epoch": 0.21052631578947367, "grad_norm": 0.9955527535333841, "learning_rate": 2.2625e-06, "loss": 1.9571, "step": 24 }, { "epoch": 0.23684210526315788, "grad_norm": 0.7832834940985813, "learning_rate": 2.092307692307692e-06, "loss": 1.9792, "step": 27 }, { "epoch": 0.2631578947368421, "grad_norm": 0.7516778470264893, "learning_rate": 1.9221153846153848e-06, "loss": 2.015, "step": 30 }, { "epoch": 0.2894736842105263, "grad_norm": 1.4307615046721256, "learning_rate": 1.7519230769230768e-06, "loss": 1.9845, "step": 33 }, { "epoch": 0.3157894736842105, "grad_norm": 0.7110172377205767, "learning_rate": 1.581730769230769e-06, "loss": 1.974, "step": 36 }, { "epoch": 0.34210526315789475, "grad_norm": 0.9472807779995442, "learning_rate": 1.4115384615384616e-06, "loss": 1.9848, "step": 39 }, { "epoch": 0.3684210526315789, "grad_norm": 0.6928271721519345, "learning_rate": 1.2413461538461538e-06, "loss": 1.9453, "step": 42 }, { "epoch": 0.39473684210526316, "grad_norm": 0.9136097540650397, "learning_rate": 1.071153846153846e-06, "loss": 1.9987, "step": 45 }, { "epoch": 0.42105263157894735, "grad_norm": 0.6671789202988747, "learning_rate": 9.009615384615385e-07, "loss": 2.0054, "step": 48 }, { "epoch": 0.4473684210526316, "grad_norm": 1.2888605208856772, "learning_rate": 7.307692307692307e-07, "loss": 1.9706, "step": 51 }, { "epoch": 0.47368421052631576, "grad_norm": 0.9597150120115726, "learning_rate": 6.740384615384617e-07, "loss": 1.9874, "step": 54 }, { "epoch": 0.5, "grad_norm": 0.7428542672176522, "learning_rate": 5.038461538461539e-07, "loss": 2.0058, "step": 57 }, { "epoch": 0.5, "step": 57, "total_flos": 227154377834496.0, "train_loss": 2.006924344782244, "train_runtime": 17818.5287, "train_samples_per_second": 0.409, "train_steps_per_second": 0.003 } ], "logging_steps": 3, "max_steps": 57, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 12, "total_flos": 227154377834496.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }