|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 5.1646223369916076, |
|
"eval_steps": 500, |
|
"global_step": 500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.9875000000000002e-05, |
|
"loss": 1.9446, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.9736111111111115e-05, |
|
"loss": 1.7941, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.9597222222222224e-05, |
|
"loss": 1.5361, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.9458333333333333e-05, |
|
"loss": 1.1041, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 1.9319444444444446e-05, |
|
"loss": 0.6053, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 1.918055555555556e-05, |
|
"loss": 0.4088, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 1.9041666666666668e-05, |
|
"loss": 0.3813, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 1.890277777777778e-05, |
|
"loss": 0.3733, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 1.876388888888889e-05, |
|
"loss": 0.3678, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 1.8625000000000002e-05, |
|
"loss": 0.3604, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 1.8486111111111115e-05, |
|
"loss": 0.3368, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 1.8347222222222224e-05, |
|
"loss": 0.3356, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 1.8208333333333333e-05, |
|
"loss": 0.3439, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 1.8069444444444446e-05, |
|
"loss": 0.3277, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 1.793055555555556e-05, |
|
"loss": 0.3288, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 3.31, |
|
"learning_rate": 1.7791666666666668e-05, |
|
"loss": 0.3227, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 1.765277777777778e-05, |
|
"loss": 0.3213, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 3.72, |
|
"learning_rate": 1.751388888888889e-05, |
|
"loss": 0.3128, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"learning_rate": 1.7375000000000002e-05, |
|
"loss": 0.3071, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 4.13, |
|
"learning_rate": 1.7236111111111115e-05, |
|
"loss": 0.3022, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 4.34, |
|
"learning_rate": 1.7097222222222224e-05, |
|
"loss": 0.2968, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 4.54, |
|
"learning_rate": 1.6958333333333333e-05, |
|
"loss": 0.3023, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 4.75, |
|
"learning_rate": 1.6819444444444446e-05, |
|
"loss": 0.2889, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 4.96, |
|
"learning_rate": 1.668055555555556e-05, |
|
"loss": 0.2763, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 5.16, |
|
"learning_rate": 1.6541666666666668e-05, |
|
"loss": 0.2778, |
|
"step": 500 |
|
} |
|
], |
|
"logging_steps": 20, |
|
"max_steps": 2880, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 30, |
|
"save_steps": 500, |
|
"total_flos": 1.584297707925504e+17, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|