{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.5030425963488844, "eval_steps": 31, "global_step": 62, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.008113590263691683, "eval_loss": 5.708795547485352, "eval_runtime": 34.9917, "eval_samples_per_second": 5.944, "eval_steps_per_second": 0.743, "step": 1 }, { "epoch": 0.02434077079107505, "grad_norm": 27.649152755737305, "learning_rate": 3e-05, "loss": 22.4374, "step": 3 }, { "epoch": 0.0486815415821501, "grad_norm": 24.12411880493164, "learning_rate": 6e-05, "loss": 20.3477, "step": 6 }, { "epoch": 0.07302231237322515, "grad_norm": 22.957651138305664, "learning_rate": 9e-05, "loss": 13.9237, "step": 9 }, { "epoch": 0.0973630831643002, "grad_norm": 22.14281463623047, "learning_rate": 9.999238475781957e-05, "loss": 8.2192, "step": 12 }, { "epoch": 0.12170385395537525, "grad_norm": 15.811911582946777, "learning_rate": 9.99524110790929e-05, "loss": 5.4619, "step": 15 }, { "epoch": 0.1460446247464503, "grad_norm": 17.17688751220703, "learning_rate": 9.987820251299122e-05, "loss": 4.1998, "step": 18 }, { "epoch": 0.17038539553752535, "grad_norm": 9.06302547454834, "learning_rate": 9.976980991835894e-05, "loss": 3.5102, "step": 21 }, { "epoch": 0.1947261663286004, "grad_norm": 9.071863174438477, "learning_rate": 9.962730758206611e-05, "loss": 2.9501, "step": 24 }, { "epoch": 0.21906693711967545, "grad_norm": 10.715943336486816, "learning_rate": 9.945079316809585e-05, "loss": 2.8262, "step": 27 }, { "epoch": 0.2434077079107505, "grad_norm": 6.811004161834717, "learning_rate": 9.924038765061042e-05, "loss": 2.6217, "step": 30 }, { "epoch": 0.2515212981744422, "eval_loss": 0.6682190299034119, "eval_runtime": 35.2448, "eval_samples_per_second": 5.902, "eval_steps_per_second": 0.738, "step": 31 }, { "epoch": 0.26774847870182555, "grad_norm": 5.427604675292969, "learning_rate": 9.899623523104149e-05, "loss": 2.4006, "step": 33 }, { "epoch": 0.2920892494929006, "grad_norm": 7.537416458129883, "learning_rate": 9.871850323926177e-05, "loss": 2.7397, "step": 36 }, { "epoch": 0.31643002028397565, "grad_norm": 8.674805641174316, "learning_rate": 9.84073820189054e-05, "loss": 2.4828, "step": 39 }, { "epoch": 0.3407707910750507, "grad_norm": 8.167730331420898, "learning_rate": 9.806308479691595e-05, "loss": 2.5302, "step": 42 }, { "epoch": 0.36511156186612576, "grad_norm": 10.937932014465332, "learning_rate": 9.768584753741134e-05, "loss": 2.3383, "step": 45 }, { "epoch": 0.3894523326572008, "grad_norm": 3.8246636390686035, "learning_rate": 9.727592877996585e-05, "loss": 2.212, "step": 48 }, { "epoch": 0.41379310344827586, "grad_norm": 4.667800426483154, "learning_rate": 9.683360946241989e-05, "loss": 2.3763, "step": 51 }, { "epoch": 0.4381338742393509, "grad_norm": 4.990477561950684, "learning_rate": 9.635919272833938e-05, "loss": 2.3995, "step": 54 }, { "epoch": 0.46247464503042596, "grad_norm": 9.09072494506836, "learning_rate": 9.58530037192562e-05, "loss": 2.545, "step": 57 }, { "epoch": 0.486815415821501, "grad_norm": 5.338695049285889, "learning_rate": 9.53153893518325e-05, "loss": 2.3183, "step": 60 }, { "epoch": 0.5030425963488844, "eval_loss": 0.5645254850387573, "eval_runtime": 13.4155, "eval_samples_per_second": 15.504, "eval_steps_per_second": 1.938, "step": 62 } ], "logging_steps": 3, "max_steps": 370, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 31, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 8.249073800419738e+16, "train_batch_size": 8, "trial_name": null, "trial_params": null }