{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 164, "is_hyper_param_search": true, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.06097560975609756, "grad_norm": 2.8818812370300293, "learning_rate": 2.2778456926584984e-06, "loss": 0.7153, "step": 10 }, { "epoch": 0.12195121951219512, "grad_norm": 1.8926554918289185, "learning_rate": 2.129933634693661e-06, "loss": 0.67, "step": 20 }, { "epoch": 0.18292682926829268, "grad_norm": 2.1769344806671143, "learning_rate": 1.9820215767288233e-06, "loss": 0.638, "step": 30 }, { "epoch": 0.24390243902439024, "grad_norm": 1.9136947393417358, "learning_rate": 1.834109518763986e-06, "loss": 0.6009, "step": 40 }, { "epoch": 0.3048780487804878, "grad_norm": 1.2495990991592407, "learning_rate": 1.6861974607991482e-06, "loss": 0.5909, "step": 50 }, { "epoch": 0.36585365853658536, "grad_norm": 1.0768756866455078, "learning_rate": 1.5382854028343109e-06, "loss": 0.5695, "step": 60 }, { "epoch": 0.4268292682926829, "grad_norm": 1.384157657623291, "learning_rate": 1.3903733448694731e-06, "loss": 0.5484, "step": 70 }, { "epoch": 0.4878048780487805, "grad_norm": 1.2738723754882812, "learning_rate": 1.2424612869046356e-06, "loss": 0.5688, "step": 80 }, { "epoch": 0.5487804878048781, "grad_norm": 1.511283278465271, "learning_rate": 1.094549228939798e-06, "loss": 0.4933, "step": 90 }, { "epoch": 0.6097560975609756, "grad_norm": 1.630743384361267, "learning_rate": 9.466371709749605e-07, "loss": 0.5173, "step": 100 }, { "epoch": 0.6707317073170732, "grad_norm": 1.6964328289031982, "learning_rate": 7.987251130101229e-07, "loss": 0.5205, "step": 110 }, { "epoch": 0.7317073170731707, "grad_norm": 1.619503140449524, "learning_rate": 6.508130550452854e-07, "loss": 0.5227, "step": 120 }, { "epoch": 0.7926829268292683, "grad_norm": 0.9739822149276733, "learning_rate": 5.029009970804477e-07, "loss": 0.4964, "step": 130 }, { "epoch": 0.8536585365853658, "grad_norm": 1.1013520956039429, "learning_rate": 3.5498893911561014e-07, "loss": 0.4978, "step": 140 }, { "epoch": 0.9146341463414634, "grad_norm": 1.492324948310852, "learning_rate": 2.0707688115077262e-07, "loss": 0.5045, "step": 150 }, { "epoch": 0.975609756097561, "grad_norm": 1.4458919763565063, "learning_rate": 5.916482318593503e-08, "loss": 0.4941, "step": 160 } ], "logging_steps": 10, "max_steps": 164, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 692539560173568.0, "train_batch_size": 32, "trial_name": null, "trial_params": { "learning_rate": 2.425757750623336e-06, "num_train_epochs": 1, "per_device_train_batch_size": 32, "seed": 7 } }