|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.002053036783575706, |
|
"eval_steps": 5, |
|
"global_step": 30, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 6.843455945252352e-05, |
|
"eval_loss": 3.538214683532715, |
|
"eval_runtime": 118.0308, |
|
"eval_samples_per_second": 52.13, |
|
"eval_steps_per_second": 26.069, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.00020530367835757058, |
|
"grad_norm": 5.86816930770874, |
|
"learning_rate": 3e-05, |
|
"loss": 14.2945, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.00034217279726261765, |
|
"eval_loss": 3.5032713413238525, |
|
"eval_runtime": 117.9917, |
|
"eval_samples_per_second": 52.148, |
|
"eval_steps_per_second": 26.078, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.00041060735671514116, |
|
"grad_norm": 5.050374984741211, |
|
"learning_rate": 6e-05, |
|
"loss": 14.3552, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0006159110350727117, |
|
"grad_norm": 5.234264373779297, |
|
"learning_rate": 9e-05, |
|
"loss": 14.0755, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0006843455945252353, |
|
"eval_loss": 3.3391058444976807, |
|
"eval_runtime": 118.289, |
|
"eval_samples_per_second": 52.017, |
|
"eval_steps_per_second": 26.013, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0008212147134302823, |
|
"grad_norm": 4.1342692375183105, |
|
"learning_rate": 9.755282581475769e-05, |
|
"loss": 12.3301, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.001026518391787853, |
|
"grad_norm": 4.67058801651001, |
|
"learning_rate": 8.535533905932738e-05, |
|
"loss": 13.0502, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.001026518391787853, |
|
"eval_loss": 3.1648316383361816, |
|
"eval_runtime": 118.209, |
|
"eval_samples_per_second": 52.052, |
|
"eval_steps_per_second": 26.03, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.0012318220701454234, |
|
"grad_norm": 6.681149482727051, |
|
"learning_rate": 6.545084971874738e-05, |
|
"loss": 14.0048, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.0013686911890504706, |
|
"eval_loss": 3.0710721015930176, |
|
"eval_runtime": 118.1932, |
|
"eval_samples_per_second": 52.059, |
|
"eval_steps_per_second": 26.034, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.001437125748502994, |
|
"grad_norm": 4.836862087249756, |
|
"learning_rate": 4.2178276747988446e-05, |
|
"loss": 12.4417, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.0016424294268605646, |
|
"grad_norm": 6.216834545135498, |
|
"learning_rate": 2.061073738537635e-05, |
|
"loss": 11.7135, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.001710863986313088, |
|
"eval_loss": 3.0287530422210693, |
|
"eval_runtime": 118.1814, |
|
"eval_samples_per_second": 52.064, |
|
"eval_steps_per_second": 26.036, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.0018477331052181353, |
|
"grad_norm": 5.625243663787842, |
|
"learning_rate": 5.449673790581611e-06, |
|
"loss": 11.8553, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.002053036783575706, |
|
"grad_norm": 4.978127479553223, |
|
"learning_rate": 0.0, |
|
"loss": 11.1414, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.002053036783575706, |
|
"eval_loss": 3.01611328125, |
|
"eval_runtime": 118.2006, |
|
"eval_samples_per_second": 52.056, |
|
"eval_steps_per_second": 26.032, |
|
"step": 30 |
|
} |
|
], |
|
"logging_steps": 3, |
|
"max_steps": 30, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 10, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 455056482631680.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|