player1537's picture
Training in progress, step 7776
7268143
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.8997917148808147,
"global_step": 7776,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 1.9997685720897943e-05,
"loss": 2.316,
"step": 1
},
{
"epoch": 0.02,
"learning_rate": 1.969914371673224e-05,
"loss": 2.907,
"step": 131
},
{
"epoch": 0.03,
"learning_rate": 1.9398287433464477e-05,
"loss": 2.8073,
"step": 262
},
{
"epoch": 0.05,
"learning_rate": 1.9095116871094656e-05,
"loss": 2.7381,
"step": 393
},
{
"epoch": 0.06,
"learning_rate": 1.8791946308724832e-05,
"loss": 2.6726,
"step": 524
},
{
"epoch": 0.08,
"learning_rate": 1.848877574635501e-05,
"loss": 2.5643,
"step": 655
},
{
"epoch": 0.09,
"learning_rate": 1.818560518398519e-05,
"loss": 2.4772,
"step": 786
},
{
"epoch": 0.11,
"learning_rate": 1.788243462161537e-05,
"loss": 2.4839,
"step": 917
},
{
"epoch": 0.12,
"learning_rate": 1.7579264059245548e-05,
"loss": 2.5131,
"step": 1048
},
{
"epoch": 0.14,
"learning_rate": 1.7276093496875727e-05,
"loss": 2.4251,
"step": 1179
},
{
"epoch": 0.15,
"learning_rate": 1.6972922934505902e-05,
"loss": 2.4532,
"step": 1310
},
{
"epoch": 0.17,
"learning_rate": 1.667206665123814e-05,
"loss": 2.4152,
"step": 1441
},
{
"epoch": 0.18,
"learning_rate": 1.636889608886832e-05,
"loss": 2.419,
"step": 1572
},
{
"epoch": 0.2,
"learning_rate": 1.6065725526498498e-05,
"loss": 2.4077,
"step": 1703
},
{
"epoch": 0.21,
"learning_rate": 1.5762554964128677e-05,
"loss": 2.4091,
"step": 1834
},
{
"epoch": 0.23,
"learning_rate": 1.5459384401758853e-05,
"loss": 2.4614,
"step": 1965
},
{
"epoch": 0.24,
"learning_rate": 1.5156213839389032e-05,
"loss": 2.3945,
"step": 2096
},
{
"epoch": 0.26,
"learning_rate": 1.485304327701921e-05,
"loss": 2.396,
"step": 2227
},
{
"epoch": 0.27,
"learning_rate": 1.454987271464939e-05,
"loss": 2.4044,
"step": 2358
},
{
"epoch": 0.29,
"learning_rate": 1.4246702152279565e-05,
"loss": 2.3655,
"step": 2489
},
{
"epoch": 0.3,
"learning_rate": 1.3943531589909744e-05,
"loss": 2.4157,
"step": 2620
},
{
"epoch": 0.32,
"learning_rate": 1.3640361027539923e-05,
"loss": 2.3632,
"step": 2751
},
{
"epoch": 0.33,
"learning_rate": 1.33371904651701e-05,
"loss": 2.3461,
"step": 2882
},
{
"epoch": 0.35,
"learning_rate": 1.303401990280028e-05,
"loss": 2.3431,
"step": 3013
},
{
"epoch": 0.36,
"learning_rate": 1.2730849340430457e-05,
"loss": 2.3715,
"step": 3144
},
{
"epoch": 0.38,
"learning_rate": 1.2429993057162695e-05,
"loss": 2.3903,
"step": 3275
},
{
"epoch": 0.39,
"learning_rate": 1.2126822494792874e-05,
"loss": 2.3352,
"step": 3406
},
{
"epoch": 0.41,
"learning_rate": 1.1823651932423051e-05,
"loss": 2.4255,
"step": 3537
},
{
"epoch": 0.42,
"learning_rate": 1.1520481370053228e-05,
"loss": 2.3413,
"step": 3668
},
{
"epoch": 0.44,
"learning_rate": 1.1217310807683407e-05,
"loss": 2.3762,
"step": 3799
},
{
"epoch": 0.45,
"learning_rate": 1.0914140245313585e-05,
"loss": 2.3598,
"step": 3930
},
{
"epoch": 0.47,
"learning_rate": 1.0610969682943764e-05,
"loss": 2.3641,
"step": 4061
},
{
"epoch": 0.49,
"learning_rate": 1.0307799120573943e-05,
"loss": 2.3373,
"step": 4192
},
{
"epoch": 0.5,
"learning_rate": 1.0004628558204118e-05,
"loss": 2.3432,
"step": 4323
},
{
"epoch": 0.52,
"learning_rate": 9.701457995834297e-06,
"loss": 2.3279,
"step": 4454
},
{
"epoch": 0.53,
"learning_rate": 9.398287433464476e-06,
"loss": 2.358,
"step": 4585
},
{
"epoch": 0.55,
"learning_rate": 9.095116871094655e-06,
"loss": 2.3604,
"step": 4716
},
{
"epoch": 0.56,
"learning_rate": 8.791946308724833e-06,
"loss": 2.3315,
"step": 4847
},
{
"epoch": 0.58,
"learning_rate": 8.488775746355012e-06,
"loss": 2.3226,
"step": 4978
},
{
"epoch": 0.59,
"learning_rate": 8.185605183985189e-06,
"loss": 2.3531,
"step": 5109
},
{
"epoch": 0.61,
"learning_rate": 7.882434621615368e-06,
"loss": 2.3453,
"step": 5240
},
{
"epoch": 0.62,
"learning_rate": 7.579264059245546e-06,
"loss": 2.3601,
"step": 5371
},
{
"epoch": 0.64,
"learning_rate": 7.2760934968757234e-06,
"loss": 2.3013,
"step": 5502
},
{
"epoch": 0.65,
"learning_rate": 6.9729229345059024e-06,
"loss": 2.3023,
"step": 5633
},
{
"epoch": 0.67,
"learning_rate": 6.669752372136081e-06,
"loss": 2.3045,
"step": 5764
},
{
"epoch": 0.68,
"learning_rate": 6.366581809766258e-06,
"loss": 2.3253,
"step": 5895
},
{
"epoch": 0.7,
"learning_rate": 6.063411247396437e-06,
"loss": 2.3111,
"step": 6026
},
{
"epoch": 0.71,
"learning_rate": 5.760240685026614e-06,
"loss": 2.3328,
"step": 6157
},
{
"epoch": 0.73,
"learning_rate": 5.457070122656792e-06,
"loss": 2.3009,
"step": 6288
},
{
"epoch": 0.75,
"learning_rate": 5.017357093265448e-06,
"loss": 2.3449,
"step": 6480
},
{
"epoch": 0.82,
"learning_rate": 3.520018514232817e-06,
"loss": 2.3381,
"step": 7128
},
{
"epoch": 0.9,
"learning_rate": 2.0203656560981255e-06,
"loss": 2.3258,
"step": 7776
}
],
"max_steps": 8642,
"num_train_epochs": 1,
"total_flos": 1.4480823419928576e+16,
"trial_name": null,
"trial_params": null
}