|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.012227181023415051, |
|
"eval_steps": 10, |
|
"global_step": 50, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00024454362046830104, |
|
"eval_loss": 7.183546543121338, |
|
"eval_runtime": 187.0794, |
|
"eval_samples_per_second": 9.205, |
|
"eval_steps_per_second": 4.602, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.001222718102341505, |
|
"grad_norm": 17.202749252319336, |
|
"learning_rate": 5e-05, |
|
"loss": 7.3138, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.00244543620468301, |
|
"grad_norm": 14.147286415100098, |
|
"learning_rate": 0.0001, |
|
"loss": 5.2795, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.00244543620468301, |
|
"eval_loss": 3.5840988159179688, |
|
"eval_runtime": 190.3052, |
|
"eval_samples_per_second": 9.049, |
|
"eval_steps_per_second": 4.524, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0036681543070245155, |
|
"grad_norm": 16.83527946472168, |
|
"learning_rate": 9.619397662556435e-05, |
|
"loss": 2.4507, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.00489087240936602, |
|
"grad_norm": 5.885318756103516, |
|
"learning_rate": 8.535533905932738e-05, |
|
"loss": 0.5604, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.00489087240936602, |
|
"eval_loss": 0.14171117544174194, |
|
"eval_runtime": 190.6068, |
|
"eval_samples_per_second": 9.034, |
|
"eval_steps_per_second": 4.517, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.006113590511707526, |
|
"grad_norm": 3.527186632156372, |
|
"learning_rate": 6.91341716182545e-05, |
|
"loss": 0.1274, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.007336308614049031, |
|
"grad_norm": 1.249354362487793, |
|
"learning_rate": 5e-05, |
|
"loss": 0.0513, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.007336308614049031, |
|
"eval_loss": 0.04599830508232117, |
|
"eval_runtime": 190.4146, |
|
"eval_samples_per_second": 9.043, |
|
"eval_steps_per_second": 4.522, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.008559026716390536, |
|
"grad_norm": 17.15851402282715, |
|
"learning_rate": 3.086582838174551e-05, |
|
"loss": 0.0512, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.00978174481873204, |
|
"grad_norm": 0.4459174573421478, |
|
"learning_rate": 1.4644660940672627e-05, |
|
"loss": 0.0019, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.00978174481873204, |
|
"eval_loss": 0.019245758652687073, |
|
"eval_runtime": 190.3106, |
|
"eval_samples_per_second": 9.048, |
|
"eval_steps_per_second": 4.524, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.011004462921073547, |
|
"grad_norm": 8.879000663757324, |
|
"learning_rate": 3.8060233744356633e-06, |
|
"loss": 0.0188, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.012227181023415051, |
|
"grad_norm": 0.0182019229978323, |
|
"learning_rate": 0.0, |
|
"loss": 0.0188, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.012227181023415051, |
|
"eval_loss": 0.013666636310517788, |
|
"eval_runtime": 190.2885, |
|
"eval_samples_per_second": 9.049, |
|
"eval_steps_per_second": 4.525, |
|
"step": 50 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 50, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 13, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 9555457081344000.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|