{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.0024285148114106467, "eval_steps": 9, "global_step": 72, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 3.372937238070342e-05, "eval_loss": 0.9200180768966675, "eval_runtime": 1651.9865, "eval_samples_per_second": 30.226, "eval_steps_per_second": 3.778, "step": 1 }, { "epoch": 0.00010118811714211028, "grad_norm": 0.370296448469162, "learning_rate": 1.5e-05, "loss": 0.9664, "step": 3 }, { "epoch": 0.00020237623428422057, "grad_norm": 0.3390261232852936, "learning_rate": 3e-05, "loss": 0.9103, "step": 6 }, { "epoch": 0.00030356435142633083, "grad_norm": 0.2934589087963104, "learning_rate": 4.5e-05, "loss": 0.8309, "step": 9 }, { "epoch": 0.00030356435142633083, "eval_loss": 0.9088784456253052, "eval_runtime": 1657.5975, "eval_samples_per_second": 30.124, "eval_steps_per_second": 3.766, "step": 9 }, { "epoch": 0.00040475246856844113, "grad_norm": 0.3271854519844055, "learning_rate": 4.993910125649561e-05, "loss": 0.8315, "step": 12 }, { "epoch": 0.0005059405857105514, "grad_norm": 0.27277594804763794, "learning_rate": 4.962019382530521e-05, "loss": 0.8816, "step": 15 }, { "epoch": 0.0006071287028526617, "grad_norm": 0.29398417472839355, "learning_rate": 4.9031542398457974e-05, "loss": 0.8761, "step": 18 }, { "epoch": 0.0006071287028526617, "eval_loss": 0.8483078479766846, "eval_runtime": 1658.0801, "eval_samples_per_second": 30.115, "eval_steps_per_second": 3.765, "step": 18 }, { "epoch": 0.0007083168199947719, "grad_norm": 0.2874149978160858, "learning_rate": 4.817959636416969e-05, "loss": 0.8283, "step": 21 }, { "epoch": 0.0008095049371368823, "grad_norm": 0.2777639329433441, "learning_rate": 4.707368982147318e-05, "loss": 0.7365, "step": 24 }, { "epoch": 0.0009106930542789925, "grad_norm": 0.2506990432739258, "learning_rate": 4.572593931387604e-05, "loss": 0.8006, "step": 27 }, { "epoch": 0.0009106930542789925, "eval_loss": 0.830035924911499, "eval_runtime": 1657.6879, "eval_samples_per_second": 30.122, "eval_steps_per_second": 3.765, "step": 27 }, { "epoch": 0.0010118811714211029, "grad_norm": 0.26121965050697327, "learning_rate": 4.415111107797445e-05, "loss": 0.7908, "step": 30 }, { "epoch": 0.001113069288563213, "grad_norm": 0.22511965036392212, "learning_rate": 4.2366459261474933e-05, "loss": 0.6791, "step": 33 }, { "epoch": 0.0012142574057053233, "grad_norm": 0.2881379723548889, "learning_rate": 4.039153688314145e-05, "loss": 0.8729, "step": 36 }, { "epoch": 0.0012142574057053233, "eval_loss": 0.8234006762504578, "eval_runtime": 1657.9322, "eval_samples_per_second": 30.118, "eval_steps_per_second": 3.765, "step": 36 }, { "epoch": 0.0013154455228474337, "grad_norm": 0.2760739028453827, "learning_rate": 3.824798160583012e-05, "loss": 0.7797, "step": 39 }, { "epoch": 0.0014166336399895438, "grad_norm": 0.2843494117259979, "learning_rate": 3.5959278669726935e-05, "loss": 0.7533, "step": 42 }, { "epoch": 0.0015178217571316542, "grad_norm": 0.23762422800064087, "learning_rate": 3.355050358314172e-05, "loss": 0.7663, "step": 45 }, { "epoch": 0.0015178217571316542, "eval_loss": 0.8193370699882507, "eval_runtime": 1657.7176, "eval_samples_per_second": 30.122, "eval_steps_per_second": 3.765, "step": 45 }, { "epoch": 0.0016190098742737645, "grad_norm": 0.3037606179714203, "learning_rate": 3.104804738999169e-05, "loss": 0.7856, "step": 48 }, { "epoch": 0.0017201979914158747, "grad_norm": 0.2485446184873581, "learning_rate": 2.8479327524001636e-05, "loss": 0.7938, "step": 51 }, { "epoch": 0.001821386108557985, "grad_norm": 0.24273481965065002, "learning_rate": 2.587248741756253e-05, "loss": 0.7606, "step": 54 }, { "epoch": 0.001821386108557985, "eval_loss": 0.8170143365859985, "eval_runtime": 1656.2446, "eval_samples_per_second": 30.148, "eval_steps_per_second": 3.769, "step": 54 }, { "epoch": 0.0019225742257000954, "grad_norm": 0.2631082534790039, "learning_rate": 2.3256088156396868e-05, "loss": 0.7554, "step": 57 }, { "epoch": 0.0020237623428422057, "grad_norm": 0.2768804430961609, "learning_rate": 2.0658795558326743e-05, "loss": 0.8178, "step": 60 }, { "epoch": 0.002124950459984316, "grad_norm": 0.23129825294017792, "learning_rate": 1.8109066104575023e-05, "loss": 0.8145, "step": 63 }, { "epoch": 0.002124950459984316, "eval_loss": 0.8158968091011047, "eval_runtime": 1657.0323, "eval_samples_per_second": 30.134, "eval_steps_per_second": 3.767, "step": 63 }, { "epoch": 0.002226138577126426, "grad_norm": 0.28922489285469055, "learning_rate": 1.56348351646022e-05, "loss": 0.7805, "step": 66 }, { "epoch": 0.0023273266942685365, "grad_norm": 0.30259978771209717, "learning_rate": 1.3263210930352737e-05, "loss": 0.8183, "step": 69 }, { "epoch": 0.0024285148114106467, "grad_norm": 0.2592143714427948, "learning_rate": 1.1020177413231334e-05, "loss": 0.8365, "step": 72 }, { "epoch": 0.0024285148114106467, "eval_loss": 0.8151922225952148, "eval_runtime": 1656.7975, "eval_samples_per_second": 30.138, "eval_steps_per_second": 3.768, "step": 72 } ], "logging_steps": 3, "max_steps": 100, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 9, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2.2337197109673984e+16, "train_batch_size": 8, "trial_name": null, "trial_params": null }