|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 164, |
|
"is_hyper_param_search": true, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.06097560975609756, |
|
"grad_norm": 2.8818812370300293, |
|
"learning_rate": 2.2778456926584984e-06, |
|
"loss": 0.7153, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.12195121951219512, |
|
"grad_norm": 1.8926554918289185, |
|
"learning_rate": 2.129933634693661e-06, |
|
"loss": 0.67, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.18292682926829268, |
|
"grad_norm": 2.1769344806671143, |
|
"learning_rate": 1.9820215767288233e-06, |
|
"loss": 0.638, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.24390243902439024, |
|
"grad_norm": 1.9136947393417358, |
|
"learning_rate": 1.834109518763986e-06, |
|
"loss": 0.6009, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.3048780487804878, |
|
"grad_norm": 1.2495990991592407, |
|
"learning_rate": 1.6861974607991482e-06, |
|
"loss": 0.5909, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.36585365853658536, |
|
"grad_norm": 1.0768756866455078, |
|
"learning_rate": 1.5382854028343109e-06, |
|
"loss": 0.5695, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.4268292682926829, |
|
"grad_norm": 1.384157657623291, |
|
"learning_rate": 1.3903733448694731e-06, |
|
"loss": 0.5484, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.4878048780487805, |
|
"grad_norm": 1.2738723754882812, |
|
"learning_rate": 1.2424612869046356e-06, |
|
"loss": 0.5688, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.5487804878048781, |
|
"grad_norm": 1.511283278465271, |
|
"learning_rate": 1.094549228939798e-06, |
|
"loss": 0.4933, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.6097560975609756, |
|
"grad_norm": 1.630743384361267, |
|
"learning_rate": 9.466371709749605e-07, |
|
"loss": 0.5173, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.6707317073170732, |
|
"grad_norm": 1.6964328289031982, |
|
"learning_rate": 7.987251130101229e-07, |
|
"loss": 0.5205, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.7317073170731707, |
|
"grad_norm": 1.619503140449524, |
|
"learning_rate": 6.508130550452854e-07, |
|
"loss": 0.5227, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.7926829268292683, |
|
"grad_norm": 0.9739822149276733, |
|
"learning_rate": 5.029009970804477e-07, |
|
"loss": 0.4964, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.8536585365853658, |
|
"grad_norm": 1.1013520956039429, |
|
"learning_rate": 3.5498893911561014e-07, |
|
"loss": 0.4978, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.9146341463414634, |
|
"grad_norm": 1.492324948310852, |
|
"learning_rate": 2.0707688115077262e-07, |
|
"loss": 0.5045, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.975609756097561, |
|
"grad_norm": 1.4458919763565063, |
|
"learning_rate": 5.916482318593503e-08, |
|
"loss": 0.4941, |
|
"step": 160 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 164, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 692539560173568.0, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": { |
|
"learning_rate": 2.425757750623336e-06, |
|
"num_train_epochs": 1, |
|
"per_device_train_batch_size": 32, |
|
"seed": 7 |
|
} |
|
} |
|
|