{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.10000045053365712, "eval_steps": 500, "global_step": 22196, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.010001847187994179, "grad_norm": 8.891203880310059, "learning_rate": 9.89998152812006e-06, "loss": 3.7466, "step": 2220 }, { "epoch": 0.020003694375988357, "grad_norm": 9.095343589782715, "learning_rate": 9.799963056240118e-06, "loss": 3.8874, "step": 4440 }, { "epoch": 0.030005541563982538, "grad_norm": 10.57905387878418, "learning_rate": 9.699944584360175e-06, "loss": 3.865, "step": 6660 }, { "epoch": 0.040007388751976715, "grad_norm": 4.935700416564941, "learning_rate": 9.599926112480233e-06, "loss": 3.8441, "step": 8880 }, { "epoch": 0.0500092359399709, "grad_norm": 8.008011817932129, "learning_rate": 9.499907640600292e-06, "loss": 3.8126, "step": 11100 }, { "epoch": 0.060011083127965076, "grad_norm": 6.469420909881592, "learning_rate": 9.39988916872035e-06, "loss": 3.8078, "step": 13320 }, { "epoch": 0.07001293031595926, "grad_norm": 7.097398281097412, "learning_rate": 9.29987069684041e-06, "loss": 3.8054, "step": 15540 }, { "epoch": 0.08001477750395343, "grad_norm": 5.952394485473633, "learning_rate": 9.199852224960467e-06, "loss": 3.802, "step": 17760 }, { "epoch": 0.09001662469194761, "grad_norm": 4.3405280113220215, "learning_rate": 9.099833753080524e-06, "loss": 3.7729, "step": 19980 } ], "logging_steps": 2220, "max_steps": 221959, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 22196, "total_flos": 5202695881728000.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }