|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 10.0, |
|
"global_step": 560, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00013451892828543385, |
|
"loss": 3.9488, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00012668528006706028, |
|
"loss": 3.8298, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00011431137524750748, |
|
"loss": 3.7557, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 9.836442450346448e-05, |
|
"loss": 3.5311, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 8.009092691870492e-05, |
|
"loss": 3.3526, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 6.0919236939313083e-05, |
|
"loss": 3.5934, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.2347916539754844e-05, |
|
"loss": 3.3983, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 2.5828599592490882e-05, |
|
"loss": 3.6609, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.2652524389394753e-05, |
|
"loss": 3.4898, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.849603540845984e-06, |
|
"loss": 3.5749, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.0792048977778093e-07, |
|
"loss": 3.4588, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 1.7199452243268996e-06, |
|
"loss": 3.2536, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 8.559673257059505e-06, |
|
"loss": 3.2439, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 2.009247481060283e-05, |
|
"loss": 3.2968, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 3.541688434458052e-05, |
|
"loss": 3.4346, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 5.333506393059682e-05, |
|
"loss": 3.3563, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 7.244643268047132e-05, |
|
"loss": 3.2696, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 9.125714365012444e-05, |
|
"loss": 3.4046, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 0.00010829685091793463, |
|
"loss": 3.4708, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 0.00012223363969730684, |
|
"loss": 3.2387, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 0.00013197813593027427, |
|
"loss": 3.3163, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 0.00013676865759867644, |
|
"loss": 3.2581, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 0.000136230751870351, |
|
"loss": 3.1941, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 0.00013040646433810595, |
|
"loss": 3.0392, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 0.00011975105251098516, |
|
"loss": 3.0188, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 0.00010509740044895205, |
|
"loss": 3.1013, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 8.759091608374473e-05, |
|
"loss": 3.0585, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 6.860000000000001e-05, |
|
"loss": 3.1302, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 4.9609083916255386e-05, |
|
"loss": 3.2358, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 3.210259955104798e-05, |
|
"loss": 3.119, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 1.744894748901483e-05, |
|
"loss": 2.9946, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 6.793535661894062e-06, |
|
"loss": 3.0184, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 9.692481296490106e-07, |
|
"loss": 2.9798, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 4.313424013235498e-07, |
|
"loss": 3.1282, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 5.22186406972573e-06, |
|
"loss": 3.0772, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 1.496636030269314e-05, |
|
"loss": 2.8216, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 2.890314908206528e-05, |
|
"loss": 2.7665, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"learning_rate": 4.594285634987545e-05, |
|
"loss": 3.0073, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 6.475356731952864e-05, |
|
"loss": 3.0372, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"learning_rate": 8.386493606940314e-05, |
|
"loss": 2.807, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 3.66, |
|
"learning_rate": 0.0001017831156554194, |
|
"loss": 3.1058, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"learning_rate": 0.0001171075251893971, |
|
"loss": 2.961, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"learning_rate": 0.0001286403267429405, |
|
"loss": 3.1032, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"learning_rate": 0.0001354800547756731, |
|
"loss": 2.7667, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 0.00013709207951022223, |
|
"loss": 3.0024, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"learning_rate": 0.00013335039645915404, |
|
"loss": 2.8538, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"learning_rate": 0.00012454747561060531, |
|
"loss": 2.8202, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"learning_rate": 0.00011137140040750914, |
|
"loss": 2.6845, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 4.38, |
|
"learning_rate": 9.485208346024522e-05, |
|
"loss": 2.6865, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 4.46, |
|
"learning_rate": 7.62807630606869e-05, |
|
"loss": 2.8686, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 4.55, |
|
"learning_rate": 5.710907308129509e-05, |
|
"loss": 2.9936, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 4.64, |
|
"learning_rate": 3.883557549653544e-05, |
|
"loss": 2.5979, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 4.73, |
|
"learning_rate": 2.2888624752492583e-05, |
|
"loss": 2.7179, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 4.82, |
|
"learning_rate": 1.0514719932939762e-05, |
|
"loss": 2.9387, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 4.91, |
|
"learning_rate": 2.681071714566175e-06, |
|
"loss": 2.6822, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 0.0, |
|
"loss": 2.7684, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 5.09, |
|
"learning_rate": 2.6810717145661523e-06, |
|
"loss": 2.6722, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 5.18, |
|
"learning_rate": 1.0514719932939649e-05, |
|
"loss": 2.597, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 5.27, |
|
"learning_rate": 2.2888624752492607e-05, |
|
"loss": 2.7343, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 5.36, |
|
"learning_rate": 3.8835575496535365e-05, |
|
"loss": 2.567, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 5.45, |
|
"learning_rate": 5.7109073081294886e-05, |
|
"loss": 2.6375, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 5.54, |
|
"learning_rate": 7.628076306068694e-05, |
|
"loss": 2.734, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 5.62, |
|
"learning_rate": 9.485208346024515e-05, |
|
"loss": 2.6448, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 5.71, |
|
"learning_rate": 0.00011137140040750908, |
|
"loss": 2.6255, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 5.8, |
|
"learning_rate": 0.0001245474756106052, |
|
"loss": 2.6455, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 5.89, |
|
"learning_rate": 0.00013335039645915407, |
|
"loss": 2.5969, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 5.98, |
|
"learning_rate": 0.00013709207951022223, |
|
"loss": 2.6923, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 6.07, |
|
"learning_rate": 0.00013548005477567314, |
|
"loss": 2.3761, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 6.16, |
|
"learning_rate": 0.00012864032674294047, |
|
"loss": 2.4563, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 6.25, |
|
"learning_rate": 0.00011710752518939715, |
|
"loss": 2.4791, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 6.34, |
|
"learning_rate": 0.00010178311565541947, |
|
"loss": 2.446, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 6.43, |
|
"learning_rate": 8.386493606940322e-05, |
|
"loss": 2.5515, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 6.52, |
|
"learning_rate": 6.475356731952872e-05, |
|
"loss": 2.5469, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 6.61, |
|
"learning_rate": 4.594285634987565e-05, |
|
"loss": 2.6391, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 6.7, |
|
"learning_rate": 2.890314908206545e-05, |
|
"loss": 2.36, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 6.79, |
|
"learning_rate": 1.496636030269327e-05, |
|
"loss": 2.4806, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 6.88, |
|
"learning_rate": 5.221864069725715e-06, |
|
"loss": 2.6083, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 6.96, |
|
"learning_rate": 4.3134240132355735e-07, |
|
"loss": 2.6457, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 7.05, |
|
"learning_rate": 9.692481296490106e-07, |
|
"loss": 2.4165, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 7.14, |
|
"learning_rate": 6.793535661894024e-06, |
|
"loss": 2.301, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 7.23, |
|
"learning_rate": 1.744894748901478e-05, |
|
"loss": 2.4478, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 7.32, |
|
"learning_rate": 3.2102599551047805e-05, |
|
"loss": 2.3692, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 7.41, |
|
"learning_rate": 4.960908391625518e-05, |
|
"loss": 2.3269, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 7.5, |
|
"learning_rate": 6.859999999999982e-05, |
|
"loss": 2.1706, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 7.59, |
|
"learning_rate": 8.759091608374469e-05, |
|
"loss": 2.3618, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 7.68, |
|
"learning_rate": 0.00010509740044895209, |
|
"loss": 2.284, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 7.77, |
|
"learning_rate": 0.00011975105251098514, |
|
"loss": 2.3587, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 7.86, |
|
"learning_rate": 0.00013040646433810593, |
|
"loss": 2.4467, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 7.95, |
|
"learning_rate": 0.000136230751870351, |
|
"loss": 2.5326, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 8.04, |
|
"learning_rate": 0.00013676865759867642, |
|
"loss": 2.3045, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 8.12, |
|
"learning_rate": 0.00013197813593027432, |
|
"loss": 2.1819, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 8.21, |
|
"learning_rate": 0.00012223363969730697, |
|
"loss": 2.2893, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 8.3, |
|
"learning_rate": 0.00010829685091793466, |
|
"loss": 2.3117, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 8.39, |
|
"learning_rate": 9.12571436501247e-05, |
|
"loss": 2.272, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 8.48, |
|
"learning_rate": 7.24464326804714e-05, |
|
"loss": 2.3461, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 8.57, |
|
"learning_rate": 5.33350639305969e-05, |
|
"loss": 2.1348, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 8.66, |
|
"learning_rate": 3.541688434458043e-05, |
|
"loss": 2.2985, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 8.75, |
|
"learning_rate": 2.0092474810602934e-05, |
|
"loss": 2.077, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 8.84, |
|
"learning_rate": 8.559673257059573e-06, |
|
"loss": 2.2565, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 8.93, |
|
"learning_rate": 1.719945224326892e-06, |
|
"loss": 2.1992, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 9.02, |
|
"learning_rate": 1.0792048977777332e-07, |
|
"loss": 2.1455, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 9.11, |
|
"learning_rate": 3.849603540845977e-06, |
|
"loss": 2.1314, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 9.2, |
|
"learning_rate": 1.2652524389394722e-05, |
|
"loss": 1.9046, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 9.29, |
|
"learning_rate": 2.582859959249101e-05, |
|
"loss": 2.0235, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 9.38, |
|
"learning_rate": 4.234791653975475e-05, |
|
"loss": 2.0746, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 9.46, |
|
"learning_rate": 6.091923693931295e-05, |
|
"loss": 2.0545, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 9.55, |
|
"learning_rate": 8.0090926918705e-05, |
|
"loss": 2.0609, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 9.64, |
|
"learning_rate": 9.83644245034643e-05, |
|
"loss": 2.1458, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 9.73, |
|
"learning_rate": 0.00011431137524750748, |
|
"loss": 1.9214, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 9.82, |
|
"learning_rate": 0.00012668528006706028, |
|
"loss": 2.2293, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 9.91, |
|
"learning_rate": 0.00013451892828543387, |
|
"loss": 2.2592, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 0.0001372, |
|
"loss": 2.1707, |
|
"step": 560 |
|
} |
|
], |
|
"max_steps": 560, |
|
"num_train_epochs": 10, |
|
"total_flos": 578761850880000.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|