|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.994219653179191, |
|
"eval_steps": 500, |
|
"global_step": 777, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.9999666208982518e-05, |
|
"loss": 28.454, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.9987985862949325e-05, |
|
"loss": 8.849, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.9959638242644855e-05, |
|
"loss": 8.2489, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.991467065265775e-05, |
|
"loss": 4.2986, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.985315813185629e-05, |
|
"loss": 4.8588, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.9775203328168643e-05, |
|
"loss": 9.355, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.9680936327290924e-05, |
|
"loss": 5.7211, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1.957051443560902e-05, |
|
"loss": 10.0795, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1.9444121917696335e-05, |
|
"loss": 3.6404, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.930196968882556e-05, |
|
"loss": 30.7862, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.9144294963007542e-05, |
|
"loss": 6.9002, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.8971360857144616e-05, |
|
"loss": 8.5785, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.8783455951958948e-05, |
|
"loss": 5.644, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 1.8580893810428562e-05, |
|
"loss": 14.8062, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 1.8364012454534687e-05, |
|
"loss": 14.919, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.813317380119356e-05, |
|
"loss": 6.5775, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.7888763058314016e-05, |
|
"loss": 6.5815, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.763118808198859e-05, |
|
"loss": 4.1731, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.736087869589092e-05, |
|
"loss": 9.1267, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.7078285974015103e-05, |
|
"loss": 3.9908, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.678388148795397e-05, |
|
"loss": 5.6814, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.6478156519972354e-05, |
|
"loss": 5.0232, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.6161621243188528e-05, |
|
"loss": 3.5724, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.5834803870231846e-05, |
|
"loss": 11.7568, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.549824977179731e-05, |
|
"loss": 5.393, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.5152520566567873e-05, |
|
"loss": 6.7241, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 1.4798193184023233e-05, |
|
"loss": 3.7898, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 1.4435858901698995e-05, |
|
"loss": 15.7335, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 1.4066122358502772e-05, |
|
"loss": 2.6745, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 1.3689600545733713e-05, |
|
"loss": 5.8789, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 1.330692177748925e-05, |
|
"loss": 2.7451, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 1.2918724642177054e-05, |
|
"loss": 3.0655, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 1.252565693688198e-05, |
|
"loss": 1.6089, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 1.2128374586366159e-05, |
|
"loss": 9.6673, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 1.172754054850619e-05, |
|
"loss": 1.9807, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 1.1323823707993937e-05, |
|
"loss": 0.7278, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 1.091789776014706e-05, |
|
"loss": 5.7635, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 1.0510440086691911e-05, |
|
"loss": 3.151, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 1.0102130625394776e-05, |
|
"loss": 16.2403, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 9.693650735427808e-06, |
|
"loss": 2.06, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 9.285682060362974e-06, |
|
"loss": 3.2416, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 8.878905390691437e-06, |
|
"loss": 1.7038, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 8.473999527766503e-06, |
|
"loss": 1.4826, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 8.071640151065902e-06, |
|
"loss": 5.8947, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 7.672498690663632e-06, |
|
"loss": 6.4248, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 7.277241206792944e-06, |
|
"loss": 4.6138, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 6.886527278370131e-06, |
|
"loss": 2.0887, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 6.501008902333912e-06, |
|
"loss": 3.9643, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 6.121329405637111e-06, |
|
"loss": 4.3215, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 5.748122371706198e-06, |
|
"loss": 3.4914, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 5.382010583160201e-06, |
|
"loss": 13.7674, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 5.0236049825532355e-06, |
|
"loss": 3.2402, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 4.673503652874977e-06, |
|
"loss": 0.7369, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 4.33229081951025e-06, |
|
"loss": 0.707, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 4.000535875323307e-06, |
|
"loss": 1.7793, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 3.6787924304935696e-06, |
|
"loss": 0.5325, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 3.3675973886884506e-06, |
|
"loss": 0.4873, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 3.0674700511149057e-06, |
|
"loss": 0.3436, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 2.7789112499447312e-06, |
|
"loss": 0.3319, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 2.502402512559773e-06, |
|
"loss": 0.4382, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 2.2384052580116465e-06, |
|
"loss": 0.2655, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 1.9873600270368664e-06, |
|
"loss": 1.3714, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 1.749685746912323e-06, |
|
"loss": 0.1256, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 1.52577903237781e-06, |
|
"loss": 0.2344, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 1.3160135237922011e-06, |
|
"loss": 0.4926, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 1.1207392636277502e-06, |
|
"loss": 0.369, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 9.402821123429017e-07, |
|
"loss": 0.1464, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 7.749432046084471e-07, |
|
"loss": 0.5273, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 6.249984467943737e-07, |
|
"loss": 0.4998, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 4.906980565560004e-07, |
|
"loss": 2.2237, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 3.722661452877163e-07, |
|
"loss": 0.2594, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 2.699003441410508e-07, |
|
"loss": 0.3388, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 1.8377147423120467e-07, |
|
"loss": 0.4497, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 1.1402326158234e-07, |
|
"loss": 0.3246, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 6.077209728732492e-08, |
|
"loss": 2.0268, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 2.4106843282165615e-08, |
|
"loss": 0.1469, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 4.088684059220249e-09, |
|
"loss": 0.5057, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"step": 777, |
|
"total_flos": 1.0509820178622054e+17, |
|
"train_loss": 4.876755037362972, |
|
"train_runtime": 2316.9087, |
|
"train_samples_per_second": 21.497, |
|
"train_steps_per_second": 0.335 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 777, |
|
"num_train_epochs": 3, |
|
"save_steps": 300, |
|
"total_flos": 1.0509820178622054e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|