|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.026055237102657634, |
|
"eval_steps": 4, |
|
"global_step": 25, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0010422094841063053, |
|
"grad_norm": 3.42587947845459, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 6.4631, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0010422094841063053, |
|
"eval_loss": 6.671364784240723, |
|
"eval_runtime": 9.44, |
|
"eval_samples_per_second": 42.903, |
|
"eval_steps_per_second": 10.805, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0020844189682126106, |
|
"grad_norm": 3.787745714187622, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 6.619, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.003126628452318916, |
|
"grad_norm": 3.6949379444122314, |
|
"learning_rate": 3e-06, |
|
"loss": 6.5476, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.004168837936425221, |
|
"grad_norm": 4.343378067016602, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 6.6851, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.004168837936425221, |
|
"eval_loss": 6.672913074493408, |
|
"eval_runtime": 7.8311, |
|
"eval_samples_per_second": 51.717, |
|
"eval_steps_per_second": 13.025, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.005211047420531527, |
|
"grad_norm": 4.449336528778076, |
|
"learning_rate": 5e-06, |
|
"loss": 6.7603, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.006253256904637832, |
|
"grad_norm": 4.981034755706787, |
|
"learning_rate": 6e-06, |
|
"loss": 6.7741, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.007295466388744137, |
|
"grad_norm": 4.62288236618042, |
|
"learning_rate": 7e-06, |
|
"loss": 7.3853, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.008337675872850442, |
|
"grad_norm": 4.797128677368164, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 7.1027, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.008337675872850442, |
|
"eval_loss": 6.669874668121338, |
|
"eval_runtime": 7.7933, |
|
"eval_samples_per_second": 51.968, |
|
"eval_steps_per_second": 13.088, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.009379885356956748, |
|
"grad_norm": 4.196491718292236, |
|
"learning_rate": 9e-06, |
|
"loss": 6.7359, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.010422094841063054, |
|
"grad_norm": 3.4589948654174805, |
|
"learning_rate": 1e-05, |
|
"loss": 6.5552, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.011464304325169358, |
|
"grad_norm": 3.497850179672241, |
|
"learning_rate": 9.890738003669029e-06, |
|
"loss": 6.8426, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.012506513809275664, |
|
"grad_norm": 3.9026501178741455, |
|
"learning_rate": 9.567727288213005e-06, |
|
"loss": 6.9201, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.012506513809275664, |
|
"eval_loss": 6.656294822692871, |
|
"eval_runtime": 7.8499, |
|
"eval_samples_per_second": 51.593, |
|
"eval_steps_per_second": 12.994, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.01354872329338197, |
|
"grad_norm": 3.805992603302002, |
|
"learning_rate": 9.045084971874738e-06, |
|
"loss": 6.4677, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.014590932777488274, |
|
"grad_norm": 3.989011526107788, |
|
"learning_rate": 8.345653031794292e-06, |
|
"loss": 6.8528, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.015633142261594582, |
|
"grad_norm": 4.253493785858154, |
|
"learning_rate": 7.500000000000001e-06, |
|
"loss": 6.8437, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.016675351745700884, |
|
"grad_norm": 4.157368183135986, |
|
"learning_rate": 6.545084971874738e-06, |
|
"loss": 7.0895, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.016675351745700884, |
|
"eval_loss": 6.632657051086426, |
|
"eval_runtime": 7.8431, |
|
"eval_samples_per_second": 51.638, |
|
"eval_steps_per_second": 13.005, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.01771756122980719, |
|
"grad_norm": 4.2801594734191895, |
|
"learning_rate": 5.522642316338268e-06, |
|
"loss": 6.8602, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.018759770713913496, |
|
"grad_norm": 3.379889726638794, |
|
"learning_rate": 4.477357683661734e-06, |
|
"loss": 6.3006, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.019801980198019802, |
|
"grad_norm": 4.244846820831299, |
|
"learning_rate": 3.4549150281252635e-06, |
|
"loss": 6.6127, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.020844189682126108, |
|
"grad_norm": 4.060556411743164, |
|
"learning_rate": 2.5000000000000015e-06, |
|
"loss": 6.3001, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.020844189682126108, |
|
"eval_loss": 6.617997646331787, |
|
"eval_runtime": 7.8463, |
|
"eval_samples_per_second": 51.617, |
|
"eval_steps_per_second": 13.0, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.021886399166232414, |
|
"grad_norm": 4.195801258087158, |
|
"learning_rate": 1.6543469682057105e-06, |
|
"loss": 7.1121, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.022928608650338717, |
|
"grad_norm": 3.2973010540008545, |
|
"learning_rate": 9.549150281252633e-07, |
|
"loss": 5.6943, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.023970818134445022, |
|
"grad_norm": 3.863523244857788, |
|
"learning_rate": 4.322727117869951e-07, |
|
"loss": 6.3595, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.02501302761855133, |
|
"grad_norm": 3.6141159534454346, |
|
"learning_rate": 1.0926199633097156e-07, |
|
"loss": 6.1388, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.02501302761855133, |
|
"eval_loss": 6.613474369049072, |
|
"eval_runtime": 7.81, |
|
"eval_samples_per_second": 51.856, |
|
"eval_steps_per_second": 13.06, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.026055237102657634, |
|
"grad_norm": 4.716870307922363, |
|
"learning_rate": 0.0, |
|
"loss": 6.6627, |
|
"step": 25 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 25, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 25, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 347093965209600.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|