|
{ |
|
"best_metric": 2.1074936389923096, |
|
"best_model_checkpoint": "output/ot-rus/checkpoint-188", |
|
"epoch": 2.0, |
|
"global_step": 188, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00013622380795859552, |
|
"loss": 2.8695, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00013332301465950465, |
|
"loss": 2.5088, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00012858017786751834, |
|
"loss": 2.4316, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0001221302806593135, |
|
"loss": 2.4985, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0001141568897498326, |
|
"loss": 2.414, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00010488693110842961, |
|
"loss": 2.3927, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 9.458423155288552e-05, |
|
"loss": 2.3658, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 8.35420101301712e-05, |
|
"loss": 2.3367, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 7.207453298233569e-05, |
|
"loss": 2.4211, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 6.0508169203451974e-05, |
|
"loss": 2.3437, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.9172102241593944e-05, |
|
"loss": 2.336, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 3.838896120314628e-05, |
|
"loss": 2.1511, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 2.8465638696361884e-05, |
|
"loss": 2.4044, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.9684556542098295e-05, |
|
"loss": 2.1521, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.2295627933180862e-05, |
|
"loss": 2.2615, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 6.50914480262796e-06, |
|
"loss": 2.1909, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 2.4897928291500582e-06, |
|
"loss": 2.1576, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 3.519644153159924e-07, |
|
"loss": 2.2327, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 2.1647870540618896, |
|
"eval_runtime": 5.4363, |
|
"eval_samples_per_second": 23.177, |
|
"eval_steps_per_second": 2.943, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 2.152095079421997, |
|
"eval_runtime": 5.3828, |
|
"eval_samples_per_second": 23.036, |
|
"eval_steps_per_second": 2.972, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 3.830872810453363e-08, |
|
"loss": 2.1472, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 1.3746270344901413e-06, |
|
"loss": 2.2033, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 4.58381008254605e-06, |
|
"loss": 2.1682, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 9.576451662754438e-06, |
|
"loss": 2.1479, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 1.6213459316852997e-05, |
|
"loss": 2.2949, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 2.4309929383066146e-05, |
|
"loss": 2.2226, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 3.3640298318194444e-05, |
|
"loss": 2.1933, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 4.3944626783346644e-05, |
|
"loss": 2.1917, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 5.493584142187686e-05, |
|
"loss": 2.2443, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 6.630773257727353e-05, |
|
"loss": 2.2132, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 7.774348513864122e-05, |
|
"loss": 2.2304, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 8.892450484875447e-05, |
|
"loss": 2.0878, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 9.953929417822461e-05, |
|
"loss": 2.2156, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 0.00010929213048843373, |
|
"loss": 2.165, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 0.00011791130471402592, |
|
"loss": 2.0622, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 0.00012515669103944476, |
|
"loss": 2.2076, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 0.00013082643668217578, |
|
"loss": 2.2435, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 0.00013476258540873022, |
|
"loss": 2.1772, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 0.00013685547811507137, |
|
"loss": 2.1434, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 2.1074936389923096, |
|
"eval_runtime": 5.358, |
|
"eval_samples_per_second": 23.143, |
|
"eval_steps_per_second": 2.986, |
|
"step": 188 |
|
} |
|
], |
|
"max_steps": 188, |
|
"num_train_epochs": 2, |
|
"total_flos": 194793209856000.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|