|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.5, |
|
"eval_steps": 3, |
|
"global_step": 57, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.008771929824561403, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.0, |
|
"loss": 2.0802, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.02631578947368421, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.0, |
|
"loss": 2.0494, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.05263157894736842, |
|
"grad_norm": 2.4723047601512564, |
|
"learning_rate": 6.000000000000001e-07, |
|
"loss": 2.0771, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.07894736842105263, |
|
"grad_norm": 2.106898891910819, |
|
"learning_rate": 1.8e-06, |
|
"loss": 2.0245, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.10526315789473684, |
|
"grad_norm": 0.8386924455230856, |
|
"learning_rate": 2.9432692307692307e-06, |
|
"loss": 2.0568, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.13157894736842105, |
|
"grad_norm": 1.1653346026055935, |
|
"learning_rate": 2.7730769230769233e-06, |
|
"loss": 2.0326, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.15789473684210525, |
|
"grad_norm": 1.5574035133942843, |
|
"learning_rate": 2.6028846153846155e-06, |
|
"loss": 2.0396, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.18421052631578946, |
|
"grad_norm": 0.9621595083981531, |
|
"learning_rate": 2.4326923076923077e-06, |
|
"loss": 2.0336, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.21052631578947367, |
|
"grad_norm": 0.9955527535333841, |
|
"learning_rate": 2.2625e-06, |
|
"loss": 1.9571, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.23684210526315788, |
|
"grad_norm": 0.7832834940985813, |
|
"learning_rate": 2.092307692307692e-06, |
|
"loss": 1.9792, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.2631578947368421, |
|
"grad_norm": 0.7516778470264893, |
|
"learning_rate": 1.9221153846153848e-06, |
|
"loss": 2.015, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.2894736842105263, |
|
"grad_norm": 1.4307615046721256, |
|
"learning_rate": 1.7519230769230768e-06, |
|
"loss": 1.9845, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.3157894736842105, |
|
"grad_norm": 0.7110172377205767, |
|
"learning_rate": 1.581730769230769e-06, |
|
"loss": 1.974, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.34210526315789475, |
|
"grad_norm": 0.9472807779995442, |
|
"learning_rate": 1.4115384615384616e-06, |
|
"loss": 1.9848, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.3684210526315789, |
|
"grad_norm": 0.6928271721519345, |
|
"learning_rate": 1.2413461538461538e-06, |
|
"loss": 1.9453, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.39473684210526316, |
|
"grad_norm": 0.9136097540650397, |
|
"learning_rate": 1.071153846153846e-06, |
|
"loss": 1.9987, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.42105263157894735, |
|
"grad_norm": 0.6671789202988747, |
|
"learning_rate": 9.009615384615385e-07, |
|
"loss": 2.0054, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.4473684210526316, |
|
"grad_norm": 1.2888605208856772, |
|
"learning_rate": 7.307692307692307e-07, |
|
"loss": 1.9706, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.47368421052631576, |
|
"grad_norm": 0.9597150120115726, |
|
"learning_rate": 6.740384615384617e-07, |
|
"loss": 1.9874, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.7428542672176522, |
|
"learning_rate": 5.038461538461539e-07, |
|
"loss": 2.0058, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"step": 57, |
|
"total_flos": 227154377834496.0, |
|
"train_loss": 2.006924344782244, |
|
"train_runtime": 17818.5287, |
|
"train_samples_per_second": 0.409, |
|
"train_steps_per_second": 0.003 |
|
} |
|
], |
|
"logging_steps": 3, |
|
"max_steps": 57, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 12, |
|
"total_flos": 227154377834496.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|