|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.006348668895754857, |
|
"eval_steps": 6, |
|
"global_step": 25, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0002539467558301943, |
|
"grad_norm": 2.3970961570739746, |
|
"learning_rate": 1e-05, |
|
"loss": 66.6603, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0002539467558301943, |
|
"eval_loss": 11.107478141784668, |
|
"eval_runtime": 18.8123, |
|
"eval_samples_per_second": 264.454, |
|
"eval_steps_per_second": 66.127, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0005078935116603886, |
|
"grad_norm": 2.4752585887908936, |
|
"learning_rate": 2e-05, |
|
"loss": 66.6705, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0007618402674905828, |
|
"grad_norm": 2.5148186683654785, |
|
"learning_rate": 3e-05, |
|
"loss": 66.6682, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0010157870233207772, |
|
"grad_norm": 2.3080971240997314, |
|
"learning_rate": 4e-05, |
|
"loss": 66.5837, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0012697337791509713, |
|
"grad_norm": 2.354560613632202, |
|
"learning_rate": 5e-05, |
|
"loss": 66.6436, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0015236805349811656, |
|
"grad_norm": 2.5473434925079346, |
|
"learning_rate": 6e-05, |
|
"loss": 66.6068, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0015236805349811656, |
|
"eval_loss": 11.101515769958496, |
|
"eval_runtime": 18.1722, |
|
"eval_samples_per_second": 273.77, |
|
"eval_steps_per_second": 68.456, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.00177762729081136, |
|
"grad_norm": 2.5556399822235107, |
|
"learning_rate": 7e-05, |
|
"loss": 66.6045, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.0020315740466415543, |
|
"grad_norm": 2.6539461612701416, |
|
"learning_rate": 8e-05, |
|
"loss": 66.6228, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.0022855208024717484, |
|
"grad_norm": 2.602025032043457, |
|
"learning_rate": 9e-05, |
|
"loss": 66.4995, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0025394675583019426, |
|
"grad_norm": 2.4404804706573486, |
|
"learning_rate": 0.0001, |
|
"loss": 66.5529, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.002793414314132137, |
|
"grad_norm": 2.2321112155914307, |
|
"learning_rate": 9.98458666866564e-05, |
|
"loss": 66.4916, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.0030473610699623312, |
|
"grad_norm": 2.796868085861206, |
|
"learning_rate": 9.938441702975689e-05, |
|
"loss": 66.4966, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.0030473610699623312, |
|
"eval_loss": 11.079522132873535, |
|
"eval_runtime": 18.1003, |
|
"eval_samples_per_second": 274.858, |
|
"eval_steps_per_second": 68.728, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.0033013078257925254, |
|
"grad_norm": 2.591143846511841, |
|
"learning_rate": 9.861849601988383e-05, |
|
"loss": 66.4451, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.00355525458162272, |
|
"grad_norm": 2.323777675628662, |
|
"learning_rate": 9.755282581475769e-05, |
|
"loss": 66.4647, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.003809201337452914, |
|
"grad_norm": 2.9256598949432373, |
|
"learning_rate": 9.619397662556435e-05, |
|
"loss": 66.3829, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.004063148093283109, |
|
"grad_norm": 2.4250593185424805, |
|
"learning_rate": 9.45503262094184e-05, |
|
"loss": 66.4283, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.004317094849113303, |
|
"grad_norm": 2.607290029525757, |
|
"learning_rate": 9.263200821770461e-05, |
|
"loss": 66.2636, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.004571041604943497, |
|
"grad_norm": 2.8356802463531494, |
|
"learning_rate": 9.045084971874738e-05, |
|
"loss": 66.2697, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.004571041604943497, |
|
"eval_loss": 11.051901817321777, |
|
"eval_runtime": 18.1634, |
|
"eval_samples_per_second": 273.902, |
|
"eval_steps_per_second": 68.489, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.004824988360773691, |
|
"grad_norm": 2.4801783561706543, |
|
"learning_rate": 8.802029828000156e-05, |
|
"loss": 66.3638, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.005078935116603885, |
|
"grad_norm": 2.5052285194396973, |
|
"learning_rate": 8.535533905932738e-05, |
|
"loss": 66.2984, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.005332881872434079, |
|
"grad_norm": 2.5191738605499268, |
|
"learning_rate": 8.247240241650918e-05, |
|
"loss": 66.3049, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.005586828628264274, |
|
"grad_norm": 2.384291887283325, |
|
"learning_rate": 7.938926261462366e-05, |
|
"loss": 66.2243, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.005840775384094468, |
|
"grad_norm": 2.3815808296203613, |
|
"learning_rate": 7.612492823579745e-05, |
|
"loss": 66.3124, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.0060947221399246625, |
|
"grad_norm": 2.601182222366333, |
|
"learning_rate": 7.269952498697734e-05, |
|
"loss": 66.2157, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.0060947221399246625, |
|
"eval_loss": 11.026150703430176, |
|
"eval_runtime": 18.1547, |
|
"eval_samples_per_second": 274.034, |
|
"eval_steps_per_second": 68.522, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.006348668895754857, |
|
"grad_norm": 2.959272861480713, |
|
"learning_rate": 6.91341716182545e-05, |
|
"loss": 66.0543, |
|
"step": 25 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 50, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 25, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3572917862400.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|