{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.3246753246753247, "eval_steps": 4, "global_step": 25, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.012987012987012988, "grad_norm": 1.7707418203353882, "learning_rate": 1.0000000000000002e-06, "loss": 3.9226, "step": 1 }, { "epoch": 0.012987012987012988, "eval_loss": 3.3614439964294434, "eval_runtime": 2.1416, "eval_samples_per_second": 15.409, "eval_steps_per_second": 4.202, "step": 1 }, { "epoch": 0.025974025974025976, "grad_norm": 1.3376400470733643, "learning_rate": 2.0000000000000003e-06, "loss": 3.7688, "step": 2 }, { "epoch": 0.03896103896103896, "grad_norm": 0.8218762874603271, "learning_rate": 3e-06, "loss": 2.3745, "step": 3 }, { "epoch": 0.05194805194805195, "grad_norm": 1.0264558792114258, "learning_rate": 4.000000000000001e-06, "loss": 3.6438, "step": 4 }, { "epoch": 0.05194805194805195, "eval_loss": 3.3627524375915527, "eval_runtime": 1.1019, "eval_samples_per_second": 29.948, "eval_steps_per_second": 8.168, "step": 4 }, { "epoch": 0.06493506493506493, "grad_norm": 1.4011552333831787, "learning_rate": 5e-06, "loss": 3.3352, "step": 5 }, { "epoch": 0.07792207792207792, "grad_norm": 0.9959847331047058, "learning_rate": 6e-06, "loss": 2.8443, "step": 6 }, { "epoch": 0.09090909090909091, "grad_norm": 1.132798194885254, "learning_rate": 7e-06, "loss": 2.8373, "step": 7 }, { "epoch": 0.1038961038961039, "grad_norm": 0.8775717616081238, "learning_rate": 8.000000000000001e-06, "loss": 3.1148, "step": 8 }, { "epoch": 0.1038961038961039, "eval_loss": 3.360588788986206, "eval_runtime": 1.1041, "eval_samples_per_second": 29.888, "eval_steps_per_second": 8.151, "step": 8 }, { "epoch": 0.11688311688311688, "grad_norm": 1.255873441696167, "learning_rate": 9e-06, "loss": 3.6851, "step": 9 }, { "epoch": 0.12987012987012986, "grad_norm": 1.1891772747039795, "learning_rate": 1e-05, "loss": 3.358, "step": 10 }, { "epoch": 0.14285714285714285, "grad_norm": 0.936803936958313, "learning_rate": 9.890738003669029e-06, "loss": 2.6444, "step": 11 }, { "epoch": 0.15584415584415584, "grad_norm": 0.9471662044525146, "learning_rate": 9.567727288213005e-06, "loss": 2.7137, "step": 12 }, { "epoch": 0.15584415584415584, "eval_loss": 3.3596856594085693, "eval_runtime": 1.1064, "eval_samples_per_second": 29.827, "eval_steps_per_second": 8.135, "step": 12 }, { "epoch": 0.16883116883116883, "grad_norm": 1.4470901489257812, "learning_rate": 9.045084971874738e-06, "loss": 3.2627, "step": 13 }, { "epoch": 0.18181818181818182, "grad_norm": 1.4950265884399414, "learning_rate": 8.345653031794292e-06, "loss": 3.6887, "step": 14 }, { "epoch": 0.19480519480519481, "grad_norm": 1.1622928380966187, "learning_rate": 7.500000000000001e-06, "loss": 3.0483, "step": 15 }, { "epoch": 0.2077922077922078, "grad_norm": 1.2167164087295532, "learning_rate": 6.545084971874738e-06, "loss": 3.3572, "step": 16 }, { "epoch": 0.2077922077922078, "eval_loss": 3.3558952808380127, "eval_runtime": 1.1045, "eval_samples_per_second": 29.877, "eval_steps_per_second": 8.148, "step": 16 }, { "epoch": 0.22077922077922077, "grad_norm": 0.882570743560791, "learning_rate": 5.522642316338268e-06, "loss": 2.9154, "step": 17 }, { "epoch": 0.23376623376623376, "grad_norm": 1.3990870714187622, "learning_rate": 4.477357683661734e-06, "loss": 2.6733, "step": 18 }, { "epoch": 0.24675324675324675, "grad_norm": 0.8659160137176514, "learning_rate": 3.4549150281252635e-06, "loss": 2.9225, "step": 19 }, { "epoch": 0.2597402597402597, "grad_norm": 1.0206493139266968, "learning_rate": 2.5000000000000015e-06, "loss": 2.9704, "step": 20 }, { "epoch": 0.2597402597402597, "eval_loss": 3.353020668029785, "eval_runtime": 1.1115, "eval_samples_per_second": 29.689, "eval_steps_per_second": 8.097, "step": 20 }, { "epoch": 0.2727272727272727, "grad_norm": 1.4770469665527344, "learning_rate": 1.6543469682057105e-06, "loss": 3.2062, "step": 21 }, { "epoch": 0.2857142857142857, "grad_norm": 1.4567232131958008, "learning_rate": 9.549150281252633e-07, "loss": 3.6146, "step": 22 }, { "epoch": 0.2987012987012987, "grad_norm": 1.4736906290054321, "learning_rate": 4.322727117869951e-07, "loss": 3.479, "step": 23 }, { "epoch": 0.3116883116883117, "grad_norm": 1.1980702877044678, "learning_rate": 1.0926199633097156e-07, "loss": 2.5612, "step": 24 }, { "epoch": 0.3116883116883117, "eval_loss": 3.353377342224121, "eval_runtime": 1.124, "eval_samples_per_second": 29.36, "eval_steps_per_second": 8.007, "step": 24 }, { "epoch": 0.3246753246753247, "grad_norm": 1.2400470972061157, "learning_rate": 0.0, "loss": 3.1659, "step": 25 } ], "logging_steps": 1, "max_steps": 25, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 25, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1286710965043200.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }