|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.0033729372380703424, |
|
"eval_steps": 9, |
|
"global_step": 100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 3.372937238070342e-05, |
|
"eval_loss": 0.9200180768966675, |
|
"eval_runtime": 1651.9865, |
|
"eval_samples_per_second": 30.226, |
|
"eval_steps_per_second": 3.778, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.00010118811714211028, |
|
"grad_norm": 0.370296448469162, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.9664, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.00020237623428422057, |
|
"grad_norm": 0.3390261232852936, |
|
"learning_rate": 3e-05, |
|
"loss": 0.9103, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.00030356435142633083, |
|
"grad_norm": 0.2934589087963104, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.8309, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.00030356435142633083, |
|
"eval_loss": 0.9088784456253052, |
|
"eval_runtime": 1657.5975, |
|
"eval_samples_per_second": 30.124, |
|
"eval_steps_per_second": 3.766, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.00040475246856844113, |
|
"grad_norm": 0.3271854519844055, |
|
"learning_rate": 4.993910125649561e-05, |
|
"loss": 0.8315, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.0005059405857105514, |
|
"grad_norm": 0.27277594804763794, |
|
"learning_rate": 4.962019382530521e-05, |
|
"loss": 0.8816, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.0006071287028526617, |
|
"grad_norm": 0.29398417472839355, |
|
"learning_rate": 4.9031542398457974e-05, |
|
"loss": 0.8761, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.0006071287028526617, |
|
"eval_loss": 0.8483078479766846, |
|
"eval_runtime": 1658.0801, |
|
"eval_samples_per_second": 30.115, |
|
"eval_steps_per_second": 3.765, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.0007083168199947719, |
|
"grad_norm": 0.2874149978160858, |
|
"learning_rate": 4.817959636416969e-05, |
|
"loss": 0.8283, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.0008095049371368823, |
|
"grad_norm": 0.2777639329433441, |
|
"learning_rate": 4.707368982147318e-05, |
|
"loss": 0.7365, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.0009106930542789925, |
|
"grad_norm": 0.2506990432739258, |
|
"learning_rate": 4.572593931387604e-05, |
|
"loss": 0.8006, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.0009106930542789925, |
|
"eval_loss": 0.830035924911499, |
|
"eval_runtime": 1657.6879, |
|
"eval_samples_per_second": 30.122, |
|
"eval_steps_per_second": 3.765, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.0010118811714211029, |
|
"grad_norm": 0.26121965050697327, |
|
"learning_rate": 4.415111107797445e-05, |
|
"loss": 0.7908, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.001113069288563213, |
|
"grad_norm": 0.22511965036392212, |
|
"learning_rate": 4.2366459261474933e-05, |
|
"loss": 0.6791, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.0012142574057053233, |
|
"grad_norm": 0.2881379723548889, |
|
"learning_rate": 4.039153688314145e-05, |
|
"loss": 0.8729, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.0012142574057053233, |
|
"eval_loss": 0.8234006762504578, |
|
"eval_runtime": 1657.9322, |
|
"eval_samples_per_second": 30.118, |
|
"eval_steps_per_second": 3.765, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.0013154455228474337, |
|
"grad_norm": 0.2760739028453827, |
|
"learning_rate": 3.824798160583012e-05, |
|
"loss": 0.7797, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.0014166336399895438, |
|
"grad_norm": 0.2843494117259979, |
|
"learning_rate": 3.5959278669726935e-05, |
|
"loss": 0.7533, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.0015178217571316542, |
|
"grad_norm": 0.23762422800064087, |
|
"learning_rate": 3.355050358314172e-05, |
|
"loss": 0.7663, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.0015178217571316542, |
|
"eval_loss": 0.8193370699882507, |
|
"eval_runtime": 1657.7176, |
|
"eval_samples_per_second": 30.122, |
|
"eval_steps_per_second": 3.765, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.0016190098742737645, |
|
"grad_norm": 0.3037606179714203, |
|
"learning_rate": 3.104804738999169e-05, |
|
"loss": 0.7856, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.0017201979914158747, |
|
"grad_norm": 0.2485446184873581, |
|
"learning_rate": 2.8479327524001636e-05, |
|
"loss": 0.7938, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.001821386108557985, |
|
"grad_norm": 0.24273481965065002, |
|
"learning_rate": 2.587248741756253e-05, |
|
"loss": 0.7606, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.001821386108557985, |
|
"eval_loss": 0.8170143365859985, |
|
"eval_runtime": 1656.2446, |
|
"eval_samples_per_second": 30.148, |
|
"eval_steps_per_second": 3.769, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.0019225742257000954, |
|
"grad_norm": 0.2631082534790039, |
|
"learning_rate": 2.3256088156396868e-05, |
|
"loss": 0.7554, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.0020237623428422057, |
|
"grad_norm": 0.2768804430961609, |
|
"learning_rate": 2.0658795558326743e-05, |
|
"loss": 0.8178, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.002124950459984316, |
|
"grad_norm": 0.23129825294017792, |
|
"learning_rate": 1.8109066104575023e-05, |
|
"loss": 0.8145, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.002124950459984316, |
|
"eval_loss": 0.8158968091011047, |
|
"eval_runtime": 1657.0323, |
|
"eval_samples_per_second": 30.134, |
|
"eval_steps_per_second": 3.767, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.002226138577126426, |
|
"grad_norm": 0.28922489285469055, |
|
"learning_rate": 1.56348351646022e-05, |
|
"loss": 0.7805, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.0023273266942685365, |
|
"grad_norm": 0.30259978771209717, |
|
"learning_rate": 1.3263210930352737e-05, |
|
"loss": 0.8183, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.0024285148114106467, |
|
"grad_norm": 0.2592143714427948, |
|
"learning_rate": 1.1020177413231334e-05, |
|
"loss": 0.8365, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.0024285148114106467, |
|
"eval_loss": 0.8151922225952148, |
|
"eval_runtime": 1656.7975, |
|
"eval_samples_per_second": 30.138, |
|
"eval_steps_per_second": 3.768, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.002529702928552757, |
|
"grad_norm": 0.21596688032150269, |
|
"learning_rate": 8.930309757836517e-06, |
|
"loss": 0.785, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.0026308910456948674, |
|
"grad_norm": 0.2620626389980316, |
|
"learning_rate": 7.016504991533726e-06, |
|
"loss": 0.8722, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.0027320791628369775, |
|
"grad_norm": 0.29990634322166443, |
|
"learning_rate": 5.299731159831953e-06, |
|
"loss": 0.8015, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.0027320791628369775, |
|
"eval_loss": 0.8147459030151367, |
|
"eval_runtime": 1656.7133, |
|
"eval_samples_per_second": 30.14, |
|
"eval_steps_per_second": 3.768, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.0028332672799790876, |
|
"grad_norm": 0.2881883382797241, |
|
"learning_rate": 3.798797596089351e-06, |
|
"loss": 0.7495, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.002934455397121198, |
|
"grad_norm": 0.2536965012550354, |
|
"learning_rate": 2.5301488425208296e-06, |
|
"loss": 0.8133, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.0030356435142633083, |
|
"grad_norm": 0.28757724165916443, |
|
"learning_rate": 1.5076844803522922e-06, |
|
"loss": 0.7783, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.0030356435142633083, |
|
"eval_loss": 0.8145592212677002, |
|
"eval_runtime": 1657.575, |
|
"eval_samples_per_second": 30.124, |
|
"eval_steps_per_second": 3.766, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.0031368316314054185, |
|
"grad_norm": 0.24967505037784576, |
|
"learning_rate": 7.426068431000882e-07, |
|
"loss": 0.7542, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.003238019748547529, |
|
"grad_norm": 0.25066524744033813, |
|
"learning_rate": 2.4329828146074095e-07, |
|
"loss": 0.8054, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.003339207865689639, |
|
"grad_norm": 0.2577332854270935, |
|
"learning_rate": 1.522932452260595e-08, |
|
"loss": 0.7455, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.003339207865689639, |
|
"eval_loss": 0.8145051598548889, |
|
"eval_runtime": 1658.0359, |
|
"eval_samples_per_second": 30.116, |
|
"eval_steps_per_second": 3.765, |
|
"step": 99 |
|
} |
|
], |
|
"logging_steps": 3, |
|
"max_steps": 100, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 9, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.0827944549023744e+16, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|