|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9778612219353829, |
|
"global_step": 12500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.804427755612923e-05, |
|
"loss": 5.6625, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.608855511225847e-05, |
|
"loss": 5.0889, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.41328326683877e-05, |
|
"loss": 4.7108, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.217711022451694e-05, |
|
"loss": 4.5723, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.0221387780646176e-05, |
|
"loss": 4.3708, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 3.826566533677541e-05, |
|
"loss": 4.2174, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 3.6309942892904644e-05, |
|
"loss": 4.1822, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.4354220449033875e-05, |
|
"loss": 4.1232, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 3.2398498005163106e-05, |
|
"loss": 3.9563, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.0442775561292343e-05, |
|
"loss": 3.9201, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 2.8487053117421574e-05, |
|
"loss": 3.7911, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.6531330673550808e-05, |
|
"loss": 3.7768, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.4575608229680045e-05, |
|
"loss": 3.8178, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.2619885785809276e-05, |
|
"loss": 3.6894, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.0664163341938514e-05, |
|
"loss": 3.5751, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.8708440898067748e-05, |
|
"loss": 3.5514, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.6752718454196982e-05, |
|
"loss": 3.5458, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.4796996010326214e-05, |
|
"loss": 3.5009, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.284127356645545e-05, |
|
"loss": 3.4645, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.0885551122584684e-05, |
|
"loss": 3.5183, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 8.929828678713916e-06, |
|
"loss": 3.4722, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 6.974106234843151e-06, |
|
"loss": 3.4618, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5.018383790972385e-06, |
|
"loss": 3.4238, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.0626613471016194e-06, |
|
"loss": 3.4192, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.1069389032308535e-06, |
|
"loss": 3.4197, |
|
"step": 12500 |
|
} |
|
], |
|
"max_steps": 12783, |
|
"num_train_epochs": 1, |
|
"total_flos": 974676043775232, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|