{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 25, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04, "grad_norm": 2.97260032828452, "learning_rate": 2e-05, "loss": 3.2188, "step": 1 }, { "epoch": 0.08, "grad_norm": 3.594459617966852, "learning_rate": 2e-05, "loss": 3.2031, "step": 2 }, { "epoch": 0.12, "grad_norm": 4.533624625545066, "learning_rate": 2e-05, "loss": 3.4062, "step": 3 }, { "epoch": 0.16, "grad_norm": 5.240356703299346, "learning_rate": 2e-05, "loss": 3.3906, "step": 4 }, { "epoch": 0.2, "grad_norm": 5.560506359364048, "learning_rate": 2e-05, "loss": 3.0156, "step": 5 }, { "epoch": 0.24, "grad_norm": 4.893164995962358, "learning_rate": 2e-05, "loss": 3.4844, "step": 6 }, { "epoch": 0.28, "grad_norm": 3.9500718110988577, "learning_rate": 2e-05, "loss": 3.3438, "step": 7 }, { "epoch": 0.32, "grad_norm": 3.769524663807585, "learning_rate": 2e-05, "loss": 3.2656, "step": 8 }, { "epoch": 0.36, "grad_norm": 4.641711022263081, "learning_rate": 2e-05, "loss": 3.5625, "step": 9 }, { "epoch": 0.4, "grad_norm": 5.045996826645826, "learning_rate": 2e-05, "loss": 3.3906, "step": 10 }, { "epoch": 0.44, "grad_norm": 3.9379891091293446, "learning_rate": 2e-05, "loss": 3.5, "step": 11 }, { "epoch": 0.48, "grad_norm": 5.935094879674494, "learning_rate": 2e-05, "loss": 3.0312, "step": 12 }, { "epoch": 0.52, "grad_norm": 4.966274615752918, "learning_rate": 2e-05, "loss": 3.2031, "step": 13 }, { "epoch": 0.56, "grad_norm": 4.115242373102183, "learning_rate": 2e-05, "loss": 3.1875, "step": 14 }, { "epoch": 0.6, "grad_norm": 3.8744700920947444, "learning_rate": 2e-05, "loss": 3.3906, "step": 15 }, { "epoch": 0.64, "grad_norm": 3.1817540324181763, "learning_rate": 2e-05, "loss": 3.2656, "step": 16 }, { "epoch": 0.68, "grad_norm": 3.2254117611390924, "learning_rate": 2e-05, "loss": 3.0781, "step": 17 }, { "epoch": 0.72, "grad_norm": 4.082116872038495, "learning_rate": 2e-05, "loss": 3.3906, "step": 18 }, { "epoch": 0.76, "grad_norm": 5.513160685623603, "learning_rate": 2e-05, "loss": 3.0469, "step": 19 }, { "epoch": 0.8, "grad_norm": 5.2179385595071155, "learning_rate": 2e-05, "loss": 3.2031, "step": 20 }, { "epoch": 0.84, "grad_norm": 5.4603287380267, "learning_rate": 2e-05, "loss": 3.5312, "step": 21 }, { "epoch": 0.88, "grad_norm": 4.596875242645009, "learning_rate": 2e-05, "loss": 3.0469, "step": 22 }, { "epoch": 0.92, "grad_norm": 4.546647529822756, "learning_rate": 2e-05, "loss": 3.0469, "step": 23 }, { "epoch": 0.96, "grad_norm": 5.146569623435821, "learning_rate": 2e-05, "loss": 3.2344, "step": 24 }, { "epoch": 1.0, "grad_norm": 6.727524092488977, "learning_rate": 2e-05, "loss": 3.1562, "step": 25 }, { "epoch": 1.0, "step": 25, "total_flos": 478414897152.0, "train_loss": 3.26375, "train_runtime": 93.9312, "train_samples_per_second": 2.097, "train_steps_per_second": 0.266 } ], "logging_steps": 1.0, "max_steps": 25, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "total_flos": 478414897152.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }