|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.23255813953488372, |
|
"eval_steps": 9, |
|
"global_step": 100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.002325581395348837, |
|
"eval_loss": 5.767578601837158, |
|
"eval_runtime": 10.4189, |
|
"eval_samples_per_second": 69.489, |
|
"eval_steps_per_second": 8.734, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0069767441860465115, |
|
"grad_norm": 0.7177151441574097, |
|
"learning_rate": 3e-05, |
|
"loss": 5.7725, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.013953488372093023, |
|
"grad_norm": 0.6705913543701172, |
|
"learning_rate": 6e-05, |
|
"loss": 5.6508, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.020930232558139535, |
|
"grad_norm": 0.9152357578277588, |
|
"learning_rate": 9e-05, |
|
"loss": 5.7661, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.020930232558139535, |
|
"eval_loss": 5.740058898925781, |
|
"eval_runtime": 10.4015, |
|
"eval_samples_per_second": 69.605, |
|
"eval_steps_per_second": 8.749, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.027906976744186046, |
|
"grad_norm": 0.8683664798736572, |
|
"learning_rate": 9.987820251299122e-05, |
|
"loss": 5.7731, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.03488372093023256, |
|
"grad_norm": 1.1061254739761353, |
|
"learning_rate": 9.924038765061042e-05, |
|
"loss": 5.4242, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.04186046511627907, |
|
"grad_norm": 0.99778813123703, |
|
"learning_rate": 9.806308479691595e-05, |
|
"loss": 5.2717, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.04186046511627907, |
|
"eval_loss": 5.513608932495117, |
|
"eval_runtime": 10.4282, |
|
"eval_samples_per_second": 69.427, |
|
"eval_steps_per_second": 8.726, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.04883720930232558, |
|
"grad_norm": 1.3651623725891113, |
|
"learning_rate": 9.635919272833938e-05, |
|
"loss": 5.5186, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.05581395348837209, |
|
"grad_norm": 1.554046630859375, |
|
"learning_rate": 9.414737964294636e-05, |
|
"loss": 5.3002, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.06279069767441861, |
|
"grad_norm": 1.4339934587478638, |
|
"learning_rate": 9.145187862775209e-05, |
|
"loss": 5.1694, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.06279069767441861, |
|
"eval_loss": 5.061354637145996, |
|
"eval_runtime": 10.4152, |
|
"eval_samples_per_second": 69.514, |
|
"eval_steps_per_second": 8.737, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.06976744186046512, |
|
"grad_norm": 1.417418360710144, |
|
"learning_rate": 8.83022221559489e-05, |
|
"loss": 4.9417, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.07674418604651163, |
|
"grad_norm": 1.4718533754348755, |
|
"learning_rate": 8.473291852294987e-05, |
|
"loss": 4.5364, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.08372093023255814, |
|
"grad_norm": 1.4996187686920166, |
|
"learning_rate": 8.07830737662829e-05, |
|
"loss": 4.6917, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.08372093023255814, |
|
"eval_loss": 4.580844402313232, |
|
"eval_runtime": 10.4483, |
|
"eval_samples_per_second": 69.294, |
|
"eval_steps_per_second": 8.71, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.09069767441860466, |
|
"grad_norm": 1.4007647037506104, |
|
"learning_rate": 7.649596321166024e-05, |
|
"loss": 4.4604, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.09767441860465116, |
|
"grad_norm": 1.404478907585144, |
|
"learning_rate": 7.191855733945387e-05, |
|
"loss": 4.2265, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.10465116279069768, |
|
"grad_norm": 1.4139697551727295, |
|
"learning_rate": 6.710100716628344e-05, |
|
"loss": 4.1217, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.10465116279069768, |
|
"eval_loss": 4.179461479187012, |
|
"eval_runtime": 10.381, |
|
"eval_samples_per_second": 69.743, |
|
"eval_steps_per_second": 8.766, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.11162790697674418, |
|
"grad_norm": 1.4293689727783203, |
|
"learning_rate": 6.209609477998338e-05, |
|
"loss": 3.9897, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.1186046511627907, |
|
"grad_norm": 1.2574753761291504, |
|
"learning_rate": 5.695865504800327e-05, |
|
"loss": 4.0715, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.12558139534883722, |
|
"grad_norm": 1.068290114402771, |
|
"learning_rate": 5.174497483512506e-05, |
|
"loss": 3.8083, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.12558139534883722, |
|
"eval_loss": 3.9013290405273438, |
|
"eval_runtime": 10.3754, |
|
"eval_samples_per_second": 69.78, |
|
"eval_steps_per_second": 8.771, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.1325581395348837, |
|
"grad_norm": 1.0287175178527832, |
|
"learning_rate": 4.6512176312793736e-05, |
|
"loss": 3.9619, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.13953488372093023, |
|
"grad_norm": 1.1125946044921875, |
|
"learning_rate": 4.131759111665349e-05, |
|
"loss": 3.6843, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.14651162790697675, |
|
"grad_norm": 0.8303821086883545, |
|
"learning_rate": 3.6218132209150045e-05, |
|
"loss": 3.6188, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.14651162790697675, |
|
"eval_loss": 3.723705768585205, |
|
"eval_runtime": 10.3865, |
|
"eval_samples_per_second": 69.706, |
|
"eval_steps_per_second": 8.761, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.15348837209302327, |
|
"grad_norm": 1.2538200616836548, |
|
"learning_rate": 3.12696703292044e-05, |
|
"loss": 3.5128, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.16046511627906976, |
|
"grad_norm": 0.9986847639083862, |
|
"learning_rate": 2.6526421860705473e-05, |
|
"loss": 3.5111, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.16744186046511628, |
|
"grad_norm": 1.1189088821411133, |
|
"learning_rate": 2.2040354826462668e-05, |
|
"loss": 3.654, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.16744186046511628, |
|
"eval_loss": 3.6171345710754395, |
|
"eval_runtime": 10.3906, |
|
"eval_samples_per_second": 69.678, |
|
"eval_steps_per_second": 8.758, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.1744186046511628, |
|
"grad_norm": 0.94745934009552, |
|
"learning_rate": 1.7860619515673033e-05, |
|
"loss": 3.7317, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.1813953488372093, |
|
"grad_norm": 0.8033557534217834, |
|
"learning_rate": 1.4033009983067452e-05, |
|
"loss": 3.7478, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.1883720930232558, |
|
"grad_norm": 0.942162036895752, |
|
"learning_rate": 1.0599462319663905e-05, |
|
"loss": 3.4858, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.1883720930232558, |
|
"eval_loss": 3.5632054805755615, |
|
"eval_runtime": 10.3798, |
|
"eval_samples_per_second": 69.751, |
|
"eval_steps_per_second": 8.767, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.19534883720930232, |
|
"grad_norm": 0.8051215410232544, |
|
"learning_rate": 7.597595192178702e-06, |
|
"loss": 3.3875, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.20232558139534884, |
|
"grad_norm": 1.0949082374572754, |
|
"learning_rate": 5.060297685041659e-06, |
|
"loss": 3.6471, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.20930232558139536, |
|
"grad_norm": 0.9996029138565063, |
|
"learning_rate": 3.0153689607045845e-06, |
|
"loss": 3.5462, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.20930232558139536, |
|
"eval_loss": 3.5418734550476074, |
|
"eval_runtime": 10.3865, |
|
"eval_samples_per_second": 69.706, |
|
"eval_steps_per_second": 8.761, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.21627906976744185, |
|
"grad_norm": 1.184890627861023, |
|
"learning_rate": 1.4852136862001764e-06, |
|
"loss": 3.6468, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.22325581395348837, |
|
"grad_norm": 1.0085941553115845, |
|
"learning_rate": 4.865965629214819e-07, |
|
"loss": 3.5317, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.2302325581395349, |
|
"grad_norm": 0.8848790526390076, |
|
"learning_rate": 3.04586490452119e-08, |
|
"loss": 3.4723, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.2302325581395349, |
|
"eval_loss": 3.538015842437744, |
|
"eval_runtime": 10.3899, |
|
"eval_samples_per_second": 69.683, |
|
"eval_steps_per_second": 8.759, |
|
"step": 99 |
|
} |
|
], |
|
"logging_steps": 3, |
|
"max_steps": 100, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 9, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 6271342215168000.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|