|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 4.0, |
|
"eval_steps": 500, |
|
"global_step": 11080, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.18050541516245489, |
|
"grad_norm": 31.555688858032227, |
|
"learning_rate": 2.864620938628159e-05, |
|
"loss": 1.3005, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.36101083032490977, |
|
"grad_norm": 20.2050838470459, |
|
"learning_rate": 2.729241877256318e-05, |
|
"loss": 0.9978, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.5415162454873647, |
|
"grad_norm": 25.661306381225586, |
|
"learning_rate": 2.5938628158844765e-05, |
|
"loss": 0.9259, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.7220216606498195, |
|
"grad_norm": 22.348859786987305, |
|
"learning_rate": 2.4584837545126353e-05, |
|
"loss": 0.8798, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.9025270758122743, |
|
"grad_norm": 32.197166442871094, |
|
"learning_rate": 2.3231046931407943e-05, |
|
"loss": 0.8471, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.0830324909747293, |
|
"grad_norm": 20.764020919799805, |
|
"learning_rate": 2.1877256317689534e-05, |
|
"loss": 0.771, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.263537906137184, |
|
"grad_norm": 15.258258819580078, |
|
"learning_rate": 2.0523465703971117e-05, |
|
"loss": 0.6937, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.444043321299639, |
|
"grad_norm": 24.816614151000977, |
|
"learning_rate": 1.9169675090252708e-05, |
|
"loss": 0.709, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.6245487364620939, |
|
"grad_norm": 38.912071228027344, |
|
"learning_rate": 1.7815884476534298e-05, |
|
"loss": 0.6831, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.8050541516245486, |
|
"grad_norm": 8.910807609558105, |
|
"learning_rate": 1.6462093862815885e-05, |
|
"loss": 0.6742, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.9855595667870036, |
|
"grad_norm": 11.664189338684082, |
|
"learning_rate": 1.5108303249097474e-05, |
|
"loss": 0.69, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 2.1660649819494586, |
|
"grad_norm": 10.968308448791504, |
|
"learning_rate": 1.3754512635379063e-05, |
|
"loss": 0.5436, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.3465703971119134, |
|
"grad_norm": 11.711438179016113, |
|
"learning_rate": 1.240072202166065e-05, |
|
"loss": 0.5357, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 2.527075812274368, |
|
"grad_norm": 13.477335929870605, |
|
"learning_rate": 1.1046931407942239e-05, |
|
"loss": 0.5359, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 2.707581227436823, |
|
"grad_norm": 10.649256706237793, |
|
"learning_rate": 9.693140794223826e-06, |
|
"loss": 0.5394, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 2.888086642599278, |
|
"grad_norm": 10.525208473205566, |
|
"learning_rate": 8.339350180505416e-06, |
|
"loss": 0.5254, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 3.068592057761733, |
|
"grad_norm": 19.402320861816406, |
|
"learning_rate": 6.985559566787004e-06, |
|
"loss": 0.4775, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 3.2490974729241877, |
|
"grad_norm": 41.23615646362305, |
|
"learning_rate": 5.631768953068592e-06, |
|
"loss": 0.4003, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 3.4296028880866425, |
|
"grad_norm": 21.56231689453125, |
|
"learning_rate": 4.277978339350181e-06, |
|
"loss": 0.3952, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 3.6101083032490973, |
|
"grad_norm": 11.254490852355957, |
|
"learning_rate": 2.924187725631769e-06, |
|
"loss": 0.4007, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 3.7906137184115525, |
|
"grad_norm": 29.451414108276367, |
|
"learning_rate": 1.5703971119133576e-06, |
|
"loss": 0.3962, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 3.9711191335740073, |
|
"grad_norm": 16.022735595703125, |
|
"learning_rate": 2.1660649819494586e-07, |
|
"loss": 0.3853, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"step": 11080, |
|
"total_flos": 1.0399493167607808e+16, |
|
"train_loss": 0.6484355885199261, |
|
"train_runtime": 8433.1347, |
|
"train_samples_per_second": 42.043, |
|
"train_steps_per_second": 1.314 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 11080, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.0399493167607808e+16, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|