|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 59.820761762509335, |
|
"eval_steps": 1602, |
|
"global_step": 16020, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 5.982076176250933, |
|
"grad_norm": 1.8657052516937256, |
|
"learning_rate": 9.001248439450687e-06, |
|
"loss": 1.7286, |
|
"step": 1602 |
|
}, |
|
{ |
|
"epoch": 5.982076176250933, |
|
"eval_accuracy": 0.06538245219347581, |
|
"eval_loss": 3.015066623687744, |
|
"eval_runtime": 37.6228, |
|
"eval_samples_per_second": 189.034, |
|
"eval_steps_per_second": 9.462, |
|
"step": 1602 |
|
}, |
|
{ |
|
"epoch": 11.964152352501866, |
|
"grad_norm": 4.766932964324951, |
|
"learning_rate": 8.001248439450687e-06, |
|
"loss": 1.6207, |
|
"step": 3204 |
|
}, |
|
{ |
|
"epoch": 11.964152352501866, |
|
"eval_accuracy": 0.06650731158605175, |
|
"eval_loss": 3.237614393234253, |
|
"eval_runtime": 37.2902, |
|
"eval_samples_per_second": 190.72, |
|
"eval_steps_per_second": 9.547, |
|
"step": 3204 |
|
}, |
|
{ |
|
"epoch": 17.9462285287528, |
|
"grad_norm": 3.993530035018921, |
|
"learning_rate": 7.00187265917603e-06, |
|
"loss": 1.5399, |
|
"step": 4806 |
|
}, |
|
{ |
|
"epoch": 17.9462285287528, |
|
"eval_accuracy": 0.06847581552305962, |
|
"eval_loss": 3.2385711669921875, |
|
"eval_runtime": 38.1712, |
|
"eval_samples_per_second": 186.319, |
|
"eval_steps_per_second": 9.326, |
|
"step": 4806 |
|
}, |
|
{ |
|
"epoch": 23.928304705003733, |
|
"grad_norm": 1.5667791366577148, |
|
"learning_rate": 6.002496878901374e-06, |
|
"loss": 1.4981, |
|
"step": 6408 |
|
}, |
|
{ |
|
"epoch": 23.928304705003733, |
|
"eval_accuracy": 0.0673158042744657, |
|
"eval_loss": 3.354517936706543, |
|
"eval_runtime": 37.7648, |
|
"eval_samples_per_second": 188.324, |
|
"eval_steps_per_second": 9.427, |
|
"step": 6408 |
|
}, |
|
{ |
|
"epoch": 29.910380881254667, |
|
"grad_norm": 2.5913686752319336, |
|
"learning_rate": 5.003121098626717e-06, |
|
"loss": 1.4774, |
|
"step": 8010 |
|
}, |
|
{ |
|
"epoch": 29.910380881254667, |
|
"eval_accuracy": 0.06766029246344207, |
|
"eval_loss": 3.340369939804077, |
|
"eval_runtime": 37.3411, |
|
"eval_samples_per_second": 190.461, |
|
"eval_steps_per_second": 9.534, |
|
"step": 8010 |
|
}, |
|
{ |
|
"epoch": 35.8924570575056, |
|
"grad_norm": 1.792555332183838, |
|
"learning_rate": 4.003121098626717e-06, |
|
"loss": 1.4648, |
|
"step": 9612 |
|
}, |
|
{ |
|
"epoch": 35.8924570575056, |
|
"eval_accuracy": 0.06699943757030372, |
|
"eval_loss": 3.4236271381378174, |
|
"eval_runtime": 37.2235, |
|
"eval_samples_per_second": 191.062, |
|
"eval_steps_per_second": 9.564, |
|
"step": 9612 |
|
}, |
|
{ |
|
"epoch": 41.87453323375654, |
|
"grad_norm": 1.6825398206710815, |
|
"learning_rate": 3.003121098626717e-06, |
|
"loss": 1.4549, |
|
"step": 11214 |
|
}, |
|
{ |
|
"epoch": 41.87453323375654, |
|
"eval_accuracy": 0.06644705126144947, |
|
"eval_loss": 3.4688560962677, |
|
"eval_runtime": 37.3243, |
|
"eval_samples_per_second": 190.546, |
|
"eval_steps_per_second": 9.538, |
|
"step": 11214 |
|
}, |
|
{ |
|
"epoch": 47.856609410007465, |
|
"grad_norm": 1.0186405181884766, |
|
"learning_rate": 2.00374531835206e-06, |
|
"loss": 1.4528, |
|
"step": 12816 |
|
}, |
|
{ |
|
"epoch": 47.856609410007465, |
|
"eval_accuracy": 0.06592730596175478, |
|
"eval_loss": 3.520540237426758, |
|
"eval_runtime": 37.6061, |
|
"eval_samples_per_second": 189.118, |
|
"eval_steps_per_second": 9.467, |
|
"step": 12816 |
|
}, |
|
{ |
|
"epoch": 53.8386855862584, |
|
"grad_norm": 1.1254180669784546, |
|
"learning_rate": 1.0043695380774035e-06, |
|
"loss": 1.4538, |
|
"step": 14418 |
|
}, |
|
{ |
|
"epoch": 53.8386855862584, |
|
"eval_accuracy": 0.06553868266466692, |
|
"eval_loss": 3.4702768325805664, |
|
"eval_runtime": 37.4351, |
|
"eval_samples_per_second": 189.982, |
|
"eval_steps_per_second": 9.51, |
|
"step": 14418 |
|
}, |
|
{ |
|
"epoch": 59.820761762509335, |
|
"grad_norm": 0.6253169775009155, |
|
"learning_rate": 4.993757802746567e-09, |
|
"loss": 1.4519, |
|
"step": 16020 |
|
}, |
|
{ |
|
"epoch": 59.820761762509335, |
|
"eval_accuracy": 0.06511529808773903, |
|
"eval_loss": 3.4956905841827393, |
|
"eval_runtime": 38.202, |
|
"eval_samples_per_second": 186.168, |
|
"eval_steps_per_second": 9.319, |
|
"step": 16020 |
|
}, |
|
{ |
|
"epoch": 59.820761762509335, |
|
"step": 16020, |
|
"total_flos": 1.9001138663630822e+18, |
|
"train_loss": 1.5142938697233925, |
|
"train_runtime": 42053.4844, |
|
"train_samples_per_second": 76.397, |
|
"train_steps_per_second": 0.381 |
|
} |
|
], |
|
"logging_steps": 1602, |
|
"max_steps": 16020, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 60, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.9001138663630822e+18, |
|
"train_batch_size": 20, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|