|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.032520325203252, |
|
"eval_steps": 500, |
|
"global_step": 500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.7537718524378184, |
|
"learning_rate": 4.998825837977733e-05, |
|
"loss": 1.0335, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.5115893351462029, |
|
"learning_rate": 4.9951068336359185e-05, |
|
"loss": 0.9543, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.3655365544393326, |
|
"learning_rate": 4.9888447388643216e-05, |
|
"loss": 0.889, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.3802482724658219, |
|
"learning_rate": 4.980045936184552e-05, |
|
"loss": 0.8824, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 0.46128857579583404, |
|
"learning_rate": 4.968719393609757e-05, |
|
"loss": 0.8812, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 0.4675840689300933, |
|
"learning_rate": 4.954876655504144e-05, |
|
"loss": 0.8626, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 0.5174033092078555, |
|
"learning_rate": 4.938531830816607e-05, |
|
"loss": 0.8542, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 0.47966453174679635, |
|
"learning_rate": 4.919701578700444e-05, |
|
"loss": 0.8615, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 0.5800019356792034, |
|
"learning_rate": 4.898405091533834e-05, |
|
"loss": 0.8198, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"grad_norm": 0.5068384935929343, |
|
"learning_rate": 4.874664075358366e-05, |
|
"loss": 0.835, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"grad_norm": 0.5665554500957887, |
|
"learning_rate": 4.84850272775557e-05, |
|
"loss": 0.833, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"grad_norm": 0.6225574393610873, |
|
"learning_rate": 4.8199477131839854e-05, |
|
"loss": 0.8362, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"grad_norm": 0.5883987854013639, |
|
"learning_rate": 4.789028135801918e-05, |
|
"loss": 0.8315, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"grad_norm": 0.6212622090526995, |
|
"learning_rate": 4.7557755098035814e-05, |
|
"loss": 0.8082, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"grad_norm": 0.6254380356435723, |
|
"learning_rate": 4.720223727298845e-05, |
|
"loss": 0.8112, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"grad_norm": 0.7114667768707209, |
|
"learning_rate": 4.682409023769342e-05, |
|
"loss": 0.8141, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"grad_norm": 0.7156140969579615, |
|
"learning_rate": 4.6423699411361474e-05, |
|
"loss": 0.8214, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"grad_norm": 0.6560300477797654, |
|
"learning_rate": 4.600147288476647e-05, |
|
"loss": 0.819, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"grad_norm": 0.6220749749772762, |
|
"learning_rate": 4.5557841004306625e-05, |
|
"loss": 0.8177, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"grad_norm": 0.7459915153227248, |
|
"learning_rate": 4.509325593338203e-05, |
|
"loss": 0.8207, |
|
"step": 500 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 2460, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"total_flos": 152390335463424.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|