|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 18.846153846153847, |
|
"eval_steps": 500, |
|
"global_step": 14040, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 3.2763532763532763e-06, |
|
"loss": 0.819, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 6.5527065527065525e-06, |
|
"loss": 0.8698, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 9.82905982905983e-06, |
|
"loss": 0.7667, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.3105413105413105e-05, |
|
"loss": 0.6645, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.6381766381766382e-05, |
|
"loss": 0.5342, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 1.965811965811966e-05, |
|
"loss": 0.5067, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 2.2934472934472936e-05, |
|
"loss": 0.434, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 2.621082621082621e-05, |
|
"loss": 0.4473, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 2.948717948717949e-05, |
|
"loss": 0.3901, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 3.2763532763532764e-05, |
|
"loss": 0.3859, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 3.603988603988604e-05, |
|
"loss": 0.3526, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 3.931623931623932e-05, |
|
"loss": 0.3339, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 4.259259259259259e-05, |
|
"loss": 0.343, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 4.586894586894587e-05, |
|
"loss": 0.2998, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 4.9145299145299147e-05, |
|
"loss": 0.2778, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"learning_rate": 5.242165242165242e-05, |
|
"loss": 0.3029, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 5.5698005698005694e-05, |
|
"loss": 0.2557, |
|
"step": 782 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 5.897435897435898e-05, |
|
"loss": 0.2605, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 6.225071225071225e-05, |
|
"loss": 0.2566, |
|
"step": 874 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 6.552706552706553e-05, |
|
"loss": 0.2409, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 6.880341880341881e-05, |
|
"loss": 0.2518, |
|
"step": 966 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 7.207977207977208e-05, |
|
"loss": 0.279, |
|
"step": 1012 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 7.535612535612536e-05, |
|
"loss": 0.2421, |
|
"step": 1058 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 7.863247863247864e-05, |
|
"loss": 0.2237, |
|
"step": 1104 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 8.190883190883192e-05, |
|
"loss": 0.2057, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 8.518518518518518e-05, |
|
"loss": 0.2496, |
|
"step": 1196 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 8.846153846153847e-05, |
|
"loss": 0.2397, |
|
"step": 1242 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"learning_rate": 9.173789173789175e-05, |
|
"loss": 0.2322, |
|
"step": 1288 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"learning_rate": 9.501424501424501e-05, |
|
"loss": 0.2046, |
|
"step": 1334 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"learning_rate": 9.829059829059829e-05, |
|
"loss": 0.2308, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"learning_rate": 9.982589427033872e-05, |
|
"loss": 0.2052, |
|
"step": 1426 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 9.946185501741058e-05, |
|
"loss": 0.1759, |
|
"step": 1472 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"learning_rate": 9.909781576448244e-05, |
|
"loss": 0.2116, |
|
"step": 1518 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 9.87337765115543e-05, |
|
"loss": 0.1603, |
|
"step": 1564 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 9.836973725862614e-05, |
|
"loss": 0.1892, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 9.800569800569801e-05, |
|
"loss": 0.1656, |
|
"step": 1656 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 9.764165875276987e-05, |
|
"loss": 0.181, |
|
"step": 1702 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 9.727761949984172e-05, |
|
"loss": 0.1727, |
|
"step": 1748 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 9.691358024691359e-05, |
|
"loss": 0.1555, |
|
"step": 1794 |
|
}, |
|
{ |
|
"epoch": 3.29, |
|
"learning_rate": 9.654954099398545e-05, |
|
"loss": 0.1152, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 3.49, |
|
"learning_rate": 9.61855017410573e-05, |
|
"loss": 0.1404, |
|
"step": 1886 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"learning_rate": 9.582146248812915e-05, |
|
"loss": 0.1635, |
|
"step": 1932 |
|
}, |
|
{ |
|
"epoch": 3.88, |
|
"learning_rate": 9.545742323520101e-05, |
|
"loss": 0.1429, |
|
"step": 1978 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"learning_rate": 9.509338398227288e-05, |
|
"loss": 0.1369, |
|
"step": 2024 |
|
}, |
|
{ |
|
"epoch": 4.27, |
|
"learning_rate": 9.472934472934474e-05, |
|
"loss": 0.1482, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 4.47, |
|
"learning_rate": 9.436530547641659e-05, |
|
"loss": 0.1388, |
|
"step": 2116 |
|
}, |
|
{ |
|
"epoch": 4.67, |
|
"learning_rate": 9.400126622348845e-05, |
|
"loss": 0.1602, |
|
"step": 2162 |
|
}, |
|
{ |
|
"epoch": 4.86, |
|
"learning_rate": 9.36372269705603e-05, |
|
"loss": 0.1764, |
|
"step": 2208 |
|
}, |
|
{ |
|
"epoch": 5.06, |
|
"learning_rate": 9.327318771763217e-05, |
|
"loss": 0.16, |
|
"step": 2254 |
|
}, |
|
{ |
|
"epoch": 5.26, |
|
"learning_rate": 9.290914846470402e-05, |
|
"loss": 0.133, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"learning_rate": 9.254510921177588e-05, |
|
"loss": 0.1226, |
|
"step": 2346 |
|
}, |
|
{ |
|
"epoch": 3.36, |
|
"learning_rate": 9.218106995884775e-05, |
|
"loss": 0.1062, |
|
"step": 2392 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"learning_rate": 9.181703070591961e-05, |
|
"loss": 0.1184, |
|
"step": 2438 |
|
}, |
|
{ |
|
"epoch": 3.76, |
|
"learning_rate": 9.145299145299146e-05, |
|
"loss": 0.107, |
|
"step": 2484 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"learning_rate": 9.108895220006331e-05, |
|
"loss": 0.1113, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"learning_rate": 9.072491294713517e-05, |
|
"loss": 0.13, |
|
"step": 2576 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"learning_rate": 9.036087369420702e-05, |
|
"loss": 0.1066, |
|
"step": 2622 |
|
}, |
|
{ |
|
"epoch": 4.54, |
|
"learning_rate": 8.999683444127889e-05, |
|
"loss": 0.1131, |
|
"step": 2668 |
|
}, |
|
{ |
|
"epoch": 4.74, |
|
"learning_rate": 8.963279518835075e-05, |
|
"loss": 0.1117, |
|
"step": 2714 |
|
}, |
|
{ |
|
"epoch": 4.94, |
|
"learning_rate": 8.926875593542262e-05, |
|
"loss": 0.1143, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 5.13, |
|
"learning_rate": 8.890471668249447e-05, |
|
"loss": 0.1041, |
|
"step": 2806 |
|
}, |
|
{ |
|
"epoch": 5.33, |
|
"learning_rate": 8.854067742956632e-05, |
|
"loss": 0.1041, |
|
"step": 2852 |
|
}, |
|
{ |
|
"epoch": 5.53, |
|
"learning_rate": 8.817663817663818e-05, |
|
"loss": 0.1205, |
|
"step": 2898 |
|
}, |
|
{ |
|
"epoch": 5.72, |
|
"learning_rate": 8.781259892371004e-05, |
|
"loss": 0.1214, |
|
"step": 2944 |
|
}, |
|
{ |
|
"epoch": 5.92, |
|
"learning_rate": 8.74485596707819e-05, |
|
"loss": 0.117, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 6.12, |
|
"learning_rate": 8.708452041785376e-05, |
|
"loss": 0.1115, |
|
"step": 3036 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 8.672048116492562e-05, |
|
"loss": 0.106, |
|
"step": 3082 |
|
}, |
|
{ |
|
"epoch": 4.22, |
|
"learning_rate": 8.635644191199747e-05, |
|
"loss": 0.0876, |
|
"step": 3128 |
|
}, |
|
{ |
|
"epoch": 4.42, |
|
"learning_rate": 8.599240265906932e-05, |
|
"loss": 0.0827, |
|
"step": 3174 |
|
}, |
|
{ |
|
"epoch": 4.62, |
|
"learning_rate": 8.562836340614119e-05, |
|
"loss": 0.0842, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"learning_rate": 8.526432415321305e-05, |
|
"loss": 0.0933, |
|
"step": 3266 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"learning_rate": 8.490028490028491e-05, |
|
"loss": 0.0908, |
|
"step": 3312 |
|
}, |
|
{ |
|
"epoch": 5.21, |
|
"learning_rate": 8.453624564735676e-05, |
|
"loss": 0.089, |
|
"step": 3358 |
|
}, |
|
{ |
|
"epoch": 5.4, |
|
"learning_rate": 8.417220639442863e-05, |
|
"loss": 0.084, |
|
"step": 3404 |
|
}, |
|
{ |
|
"epoch": 5.6, |
|
"learning_rate": 8.380816714150048e-05, |
|
"loss": 0.0745, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 5.79, |
|
"learning_rate": 8.344412788857233e-05, |
|
"loss": 0.0786, |
|
"step": 3496 |
|
}, |
|
{ |
|
"epoch": 5.99, |
|
"learning_rate": 8.308008863564419e-05, |
|
"loss": 0.077, |
|
"step": 3542 |
|
}, |
|
{ |
|
"epoch": 6.19, |
|
"learning_rate": 8.271604938271605e-05, |
|
"loss": 0.0882, |
|
"step": 3588 |
|
}, |
|
{ |
|
"epoch": 6.38, |
|
"learning_rate": 8.235201012978792e-05, |
|
"loss": 0.0902, |
|
"step": 3634 |
|
}, |
|
{ |
|
"epoch": 6.58, |
|
"learning_rate": 8.198797087685977e-05, |
|
"loss": 0.0879, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 6.78, |
|
"learning_rate": 8.162393162393163e-05, |
|
"loss": 0.0881, |
|
"step": 3726 |
|
}, |
|
{ |
|
"epoch": 6.97, |
|
"learning_rate": 8.125989237100348e-05, |
|
"loss": 0.0955, |
|
"step": 3772 |
|
}, |
|
{ |
|
"epoch": 7.17, |
|
"learning_rate": 8.089585311807535e-05, |
|
"loss": 0.0917, |
|
"step": 3818 |
|
}, |
|
{ |
|
"epoch": 5.08, |
|
"learning_rate": 8.05318138651472e-05, |
|
"loss": 0.0851, |
|
"step": 3864 |
|
}, |
|
{ |
|
"epoch": 5.28, |
|
"learning_rate": 8.016777461221906e-05, |
|
"loss": 0.0638, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 5.47, |
|
"learning_rate": 7.980373535929092e-05, |
|
"loss": 0.0795, |
|
"step": 3956 |
|
}, |
|
{ |
|
"epoch": 5.67, |
|
"learning_rate": 7.943969610636279e-05, |
|
"loss": 0.0671, |
|
"step": 4002 |
|
}, |
|
{ |
|
"epoch": 5.87, |
|
"learning_rate": 7.907565685343464e-05, |
|
"loss": 0.0779, |
|
"step": 4048 |
|
}, |
|
{ |
|
"epoch": 6.06, |
|
"learning_rate": 7.871161760050649e-05, |
|
"loss": 0.0722, |
|
"step": 4094 |
|
}, |
|
{ |
|
"epoch": 6.26, |
|
"learning_rate": 7.834757834757835e-05, |
|
"loss": 0.0594, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 6.46, |
|
"learning_rate": 7.79835390946502e-05, |
|
"loss": 0.0774, |
|
"step": 4186 |
|
}, |
|
{ |
|
"epoch": 6.65, |
|
"learning_rate": 7.761949984172207e-05, |
|
"loss": 0.0727, |
|
"step": 4232 |
|
}, |
|
{ |
|
"epoch": 6.85, |
|
"learning_rate": 7.725546058879393e-05, |
|
"loss": 0.0621, |
|
"step": 4278 |
|
}, |
|
{ |
|
"epoch": 7.05, |
|
"learning_rate": 7.68914213358658e-05, |
|
"loss": 0.0619, |
|
"step": 4324 |
|
}, |
|
{ |
|
"epoch": 7.24, |
|
"learning_rate": 7.652738208293764e-05, |
|
"loss": 0.0633, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 7.44, |
|
"learning_rate": 7.61633428300095e-05, |
|
"loss": 0.0585, |
|
"step": 4416 |
|
}, |
|
{ |
|
"epoch": 7.64, |
|
"learning_rate": 7.579930357708136e-05, |
|
"loss": 0.0743, |
|
"step": 4462 |
|
}, |
|
{ |
|
"epoch": 7.83, |
|
"learning_rate": 7.543526432415322e-05, |
|
"loss": 0.0751, |
|
"step": 4508 |
|
}, |
|
{ |
|
"epoch": 8.03, |
|
"learning_rate": 7.507122507122507e-05, |
|
"loss": 0.0597, |
|
"step": 4554 |
|
}, |
|
{ |
|
"epoch": 8.23, |
|
"learning_rate": 7.470718581829694e-05, |
|
"loss": 0.0692, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 6.14, |
|
"learning_rate": 7.43431465653688e-05, |
|
"loss": 0.0595, |
|
"step": 4646 |
|
}, |
|
{ |
|
"epoch": 6.33, |
|
"learning_rate": 7.397910731244065e-05, |
|
"loss": 0.0505, |
|
"step": 4692 |
|
}, |
|
{ |
|
"epoch": 6.53, |
|
"learning_rate": 7.36150680595125e-05, |
|
"loss": 0.0516, |
|
"step": 4738 |
|
}, |
|
{ |
|
"epoch": 6.73, |
|
"learning_rate": 7.325102880658436e-05, |
|
"loss": 0.0577, |
|
"step": 4784 |
|
}, |
|
{ |
|
"epoch": 6.92, |
|
"learning_rate": 7.288698955365623e-05, |
|
"loss": 0.0546, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 7.12, |
|
"learning_rate": 7.252295030072809e-05, |
|
"loss": 0.0472, |
|
"step": 4876 |
|
}, |
|
{ |
|
"epoch": 7.32, |
|
"learning_rate": 7.215891104779994e-05, |
|
"loss": 0.0565, |
|
"step": 4922 |
|
}, |
|
{ |
|
"epoch": 7.51, |
|
"learning_rate": 7.17948717948718e-05, |
|
"loss": 0.0545, |
|
"step": 4968 |
|
}, |
|
{ |
|
"epoch": 7.71, |
|
"learning_rate": 7.143083254194365e-05, |
|
"loss": 0.0399, |
|
"step": 5014 |
|
}, |
|
{ |
|
"epoch": 7.91, |
|
"learning_rate": 7.10667932890155e-05, |
|
"loss": 0.0536, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 8.1, |
|
"learning_rate": 7.070275403608737e-05, |
|
"loss": 0.0567, |
|
"step": 5106 |
|
}, |
|
{ |
|
"epoch": 8.3, |
|
"learning_rate": 7.033871478315923e-05, |
|
"loss": 0.0608, |
|
"step": 5152 |
|
}, |
|
{ |
|
"epoch": 8.5, |
|
"learning_rate": 6.99746755302311e-05, |
|
"loss": 0.0517, |
|
"step": 5198 |
|
}, |
|
{ |
|
"epoch": 8.69, |
|
"learning_rate": 6.961063627730295e-05, |
|
"loss": 0.0514, |
|
"step": 5244 |
|
}, |
|
{ |
|
"epoch": 8.89, |
|
"learning_rate": 6.924659702437481e-05, |
|
"loss": 0.0525, |
|
"step": 5290 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"learning_rate": 6.888255777144666e-05, |
|
"loss": 0.0554, |
|
"step": 5336 |
|
}, |
|
{ |
|
"epoch": 9.28, |
|
"learning_rate": 6.851851851851852e-05, |
|
"loss": 0.0696, |
|
"step": 5382 |
|
}, |
|
{ |
|
"epoch": 7.19, |
|
"learning_rate": 6.815447926559037e-05, |
|
"loss": 0.0358, |
|
"step": 5428 |
|
}, |
|
{ |
|
"epoch": 7.39, |
|
"learning_rate": 6.779044001266224e-05, |
|
"loss": 0.0413, |
|
"step": 5474 |
|
}, |
|
{ |
|
"epoch": 7.59, |
|
"learning_rate": 6.74264007597341e-05, |
|
"loss": 0.0358, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 7.78, |
|
"learning_rate": 6.706236150680597e-05, |
|
"loss": 0.0483, |
|
"step": 5566 |
|
}, |
|
{ |
|
"epoch": 7.98, |
|
"learning_rate": 6.669832225387782e-05, |
|
"loss": 0.039, |
|
"step": 5612 |
|
}, |
|
{ |
|
"epoch": 8.18, |
|
"learning_rate": 6.633428300094967e-05, |
|
"loss": 0.0381, |
|
"step": 5658 |
|
}, |
|
{ |
|
"epoch": 8.37, |
|
"learning_rate": 6.597024374802153e-05, |
|
"loss": 0.0502, |
|
"step": 5704 |
|
}, |
|
{ |
|
"epoch": 8.57, |
|
"learning_rate": 6.560620449509338e-05, |
|
"loss": 0.0421, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 8.76, |
|
"learning_rate": 6.524216524216524e-05, |
|
"loss": 0.0447, |
|
"step": 5796 |
|
}, |
|
{ |
|
"epoch": 8.96, |
|
"learning_rate": 6.487812598923711e-05, |
|
"loss": 0.0406, |
|
"step": 5842 |
|
}, |
|
{ |
|
"epoch": 9.16, |
|
"learning_rate": 6.451408673630897e-05, |
|
"loss": 0.0446, |
|
"step": 5888 |
|
}, |
|
{ |
|
"epoch": 9.35, |
|
"learning_rate": 6.415004748338082e-05, |
|
"loss": 0.0483, |
|
"step": 5934 |
|
}, |
|
{ |
|
"epoch": 9.55, |
|
"learning_rate": 6.378600823045267e-05, |
|
"loss": 0.0432, |
|
"step": 5980 |
|
}, |
|
{ |
|
"epoch": 9.75, |
|
"learning_rate": 6.342196897752454e-05, |
|
"loss": 0.0483, |
|
"step": 6026 |
|
}, |
|
{ |
|
"epoch": 9.94, |
|
"learning_rate": 6.30579297245964e-05, |
|
"loss": 0.0493, |
|
"step": 6072 |
|
}, |
|
{ |
|
"epoch": 10.14, |
|
"learning_rate": 6.269389047166825e-05, |
|
"loss": 0.0448, |
|
"step": 6118 |
|
}, |
|
{ |
|
"epoch": 8.05, |
|
"learning_rate": 6.232985121874011e-05, |
|
"loss": 0.037, |
|
"step": 6164 |
|
}, |
|
{ |
|
"epoch": 8.25, |
|
"learning_rate": 6.196581196581198e-05, |
|
"loss": 0.0319, |
|
"step": 6210 |
|
}, |
|
{ |
|
"epoch": 8.44, |
|
"learning_rate": 6.160177271288383e-05, |
|
"loss": 0.0341, |
|
"step": 6256 |
|
}, |
|
{ |
|
"epoch": 8.64, |
|
"learning_rate": 6.123773345995568e-05, |
|
"loss": 0.0322, |
|
"step": 6302 |
|
}, |
|
{ |
|
"epoch": 8.84, |
|
"learning_rate": 6.087369420702754e-05, |
|
"loss": 0.0375, |
|
"step": 6348 |
|
}, |
|
{ |
|
"epoch": 9.03, |
|
"learning_rate": 6.0509654954099404e-05, |
|
"loss": 0.0291, |
|
"step": 6394 |
|
}, |
|
{ |
|
"epoch": 9.23, |
|
"learning_rate": 6.014561570117127e-05, |
|
"loss": 0.0322, |
|
"step": 6440 |
|
}, |
|
{ |
|
"epoch": 9.43, |
|
"learning_rate": 5.978157644824312e-05, |
|
"loss": 0.0315, |
|
"step": 6486 |
|
}, |
|
{ |
|
"epoch": 9.62, |
|
"learning_rate": 5.9417537195314975e-05, |
|
"loss": 0.0341, |
|
"step": 6532 |
|
}, |
|
{ |
|
"epoch": 9.82, |
|
"learning_rate": 5.905349794238684e-05, |
|
"loss": 0.0314, |
|
"step": 6578 |
|
}, |
|
{ |
|
"epoch": 10.02, |
|
"learning_rate": 5.868945868945869e-05, |
|
"loss": 0.0338, |
|
"step": 6624 |
|
}, |
|
{ |
|
"epoch": 10.21, |
|
"learning_rate": 5.8325419436530546e-05, |
|
"loss": 0.0311, |
|
"step": 6670 |
|
}, |
|
{ |
|
"epoch": 10.41, |
|
"learning_rate": 5.796138018360241e-05, |
|
"loss": 0.0335, |
|
"step": 6716 |
|
}, |
|
{ |
|
"epoch": 10.61, |
|
"learning_rate": 5.7597340930674274e-05, |
|
"loss": 0.0409, |
|
"step": 6762 |
|
}, |
|
{ |
|
"epoch": 10.8, |
|
"learning_rate": 5.7233301677746124e-05, |
|
"loss": 0.0332, |
|
"step": 6808 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"learning_rate": 5.686926242481798e-05, |
|
"loss": 0.0456, |
|
"step": 6854 |
|
}, |
|
{ |
|
"epoch": 11.2, |
|
"learning_rate": 5.6505223171889845e-05, |
|
"loss": 0.0311, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 9.11, |
|
"learning_rate": 5.61411839189617e-05, |
|
"loss": 0.0331, |
|
"step": 6946 |
|
}, |
|
{ |
|
"epoch": 9.3, |
|
"learning_rate": 5.577714466603355e-05, |
|
"loss": 0.0234, |
|
"step": 6992 |
|
}, |
|
{ |
|
"epoch": 9.5, |
|
"learning_rate": 5.5413105413105416e-05, |
|
"loss": 0.0214, |
|
"step": 7038 |
|
}, |
|
{ |
|
"epoch": 9.7, |
|
"learning_rate": 5.504906616017728e-05, |
|
"loss": 0.0266, |
|
"step": 7084 |
|
}, |
|
{ |
|
"epoch": 9.89, |
|
"learning_rate": 5.4685026907249136e-05, |
|
"loss": 0.0279, |
|
"step": 7130 |
|
}, |
|
{ |
|
"epoch": 10.09, |
|
"learning_rate": 5.4320987654320986e-05, |
|
"loss": 0.0256, |
|
"step": 7176 |
|
}, |
|
{ |
|
"epoch": 10.29, |
|
"learning_rate": 5.395694840139285e-05, |
|
"loss": 0.0256, |
|
"step": 7222 |
|
}, |
|
{ |
|
"epoch": 10.48, |
|
"learning_rate": 5.359290914846471e-05, |
|
"loss": 0.0277, |
|
"step": 7268 |
|
}, |
|
{ |
|
"epoch": 10.68, |
|
"learning_rate": 5.322886989553656e-05, |
|
"loss": 0.0237, |
|
"step": 7314 |
|
}, |
|
{ |
|
"epoch": 10.88, |
|
"learning_rate": 5.286483064260842e-05, |
|
"loss": 0.0286, |
|
"step": 7360 |
|
}, |
|
{ |
|
"epoch": 11.07, |
|
"learning_rate": 5.2500791389680285e-05, |
|
"loss": 0.0285, |
|
"step": 7406 |
|
}, |
|
{ |
|
"epoch": 11.27, |
|
"learning_rate": 5.213675213675214e-05, |
|
"loss": 0.0325, |
|
"step": 7452 |
|
}, |
|
{ |
|
"epoch": 11.47, |
|
"learning_rate": 5.177271288382399e-05, |
|
"loss": 0.0345, |
|
"step": 7498 |
|
}, |
|
{ |
|
"epoch": 11.66, |
|
"learning_rate": 5.1408673630895856e-05, |
|
"loss": 0.0254, |
|
"step": 7544 |
|
}, |
|
{ |
|
"epoch": 11.86, |
|
"learning_rate": 5.104463437796771e-05, |
|
"loss": 0.0277, |
|
"step": 7590 |
|
}, |
|
{ |
|
"epoch": 12.06, |
|
"learning_rate": 5.0680595125039577e-05, |
|
"loss": 0.0242, |
|
"step": 7636 |
|
}, |
|
{ |
|
"epoch": 12.25, |
|
"learning_rate": 5.031655587211143e-05, |
|
"loss": 0.0277, |
|
"step": 7682 |
|
}, |
|
{ |
|
"epoch": 10.16, |
|
"learning_rate": 4.995251661918329e-05, |
|
"loss": 0.0188, |
|
"step": 7728 |
|
}, |
|
{ |
|
"epoch": 10.36, |
|
"learning_rate": 4.958847736625515e-05, |
|
"loss": 0.0218, |
|
"step": 7774 |
|
}, |
|
{ |
|
"epoch": 10.56, |
|
"learning_rate": 4.9224438113327004e-05, |
|
"loss": 0.0244, |
|
"step": 7820 |
|
}, |
|
{ |
|
"epoch": 10.75, |
|
"learning_rate": 4.886039886039887e-05, |
|
"loss": 0.0204, |
|
"step": 7866 |
|
}, |
|
{ |
|
"epoch": 10.95, |
|
"learning_rate": 4.849635960747072e-05, |
|
"loss": 0.0224, |
|
"step": 7912 |
|
}, |
|
{ |
|
"epoch": 11.15, |
|
"learning_rate": 4.813232035454258e-05, |
|
"loss": 0.0205, |
|
"step": 7958 |
|
}, |
|
{ |
|
"epoch": 11.34, |
|
"learning_rate": 4.776828110161444e-05, |
|
"loss": 0.027, |
|
"step": 8004 |
|
}, |
|
{ |
|
"epoch": 11.54, |
|
"learning_rate": 4.7404241848686296e-05, |
|
"loss": 0.0183, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 11.74, |
|
"learning_rate": 4.704020259575815e-05, |
|
"loss": 0.0199, |
|
"step": 8096 |
|
}, |
|
{ |
|
"epoch": 11.93, |
|
"learning_rate": 4.667616334283001e-05, |
|
"loss": 0.0227, |
|
"step": 8142 |
|
}, |
|
{ |
|
"epoch": 12.13, |
|
"learning_rate": 4.6312124089901874e-05, |
|
"loss": 0.0232, |
|
"step": 8188 |
|
}, |
|
{ |
|
"epoch": 12.32, |
|
"learning_rate": 4.5948084836973724e-05, |
|
"loss": 0.0221, |
|
"step": 8234 |
|
}, |
|
{ |
|
"epoch": 12.52, |
|
"learning_rate": 4.558404558404559e-05, |
|
"loss": 0.0207, |
|
"step": 8280 |
|
}, |
|
{ |
|
"epoch": 12.72, |
|
"learning_rate": 4.5220006331117445e-05, |
|
"loss": 0.0213, |
|
"step": 8326 |
|
}, |
|
{ |
|
"epoch": 12.91, |
|
"learning_rate": 4.48559670781893e-05, |
|
"loss": 0.0201, |
|
"step": 8372 |
|
}, |
|
{ |
|
"epoch": 13.11, |
|
"learning_rate": 4.449192782526116e-05, |
|
"loss": 0.0196, |
|
"step": 8418 |
|
}, |
|
{ |
|
"epoch": 11.02, |
|
"learning_rate": 4.412788857233302e-05, |
|
"loss": 0.0197, |
|
"step": 8464 |
|
}, |
|
{ |
|
"epoch": 11.22, |
|
"learning_rate": 4.376384931940488e-05, |
|
"loss": 0.0154, |
|
"step": 8510 |
|
}, |
|
{ |
|
"epoch": 11.41, |
|
"learning_rate": 4.3399810066476736e-05, |
|
"loss": 0.0177, |
|
"step": 8556 |
|
}, |
|
{ |
|
"epoch": 11.61, |
|
"learning_rate": 4.303577081354859e-05, |
|
"loss": 0.0158, |
|
"step": 8602 |
|
}, |
|
{ |
|
"epoch": 11.81, |
|
"learning_rate": 4.267173156062046e-05, |
|
"loss": 0.0158, |
|
"step": 8648 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 4.230769230769231e-05, |
|
"loss": 0.0163, |
|
"step": 8694 |
|
}, |
|
{ |
|
"epoch": 12.2, |
|
"learning_rate": 4.194365305476417e-05, |
|
"loss": 0.0184, |
|
"step": 8740 |
|
}, |
|
{ |
|
"epoch": 12.4, |
|
"learning_rate": 4.157961380183603e-05, |
|
"loss": 0.016, |
|
"step": 8786 |
|
}, |
|
{ |
|
"epoch": 12.59, |
|
"learning_rate": 4.1215574548907885e-05, |
|
"loss": 0.0154, |
|
"step": 8832 |
|
}, |
|
{ |
|
"epoch": 12.79, |
|
"learning_rate": 4.085153529597974e-05, |
|
"loss": 0.0168, |
|
"step": 8878 |
|
}, |
|
{ |
|
"epoch": 12.99, |
|
"learning_rate": 4.04874960430516e-05, |
|
"loss": 0.0165, |
|
"step": 8924 |
|
}, |
|
{ |
|
"epoch": 13.18, |
|
"learning_rate": 4.012345679012346e-05, |
|
"loss": 0.0143, |
|
"step": 8970 |
|
}, |
|
{ |
|
"epoch": 13.38, |
|
"learning_rate": 3.975941753719531e-05, |
|
"loss": 0.0188, |
|
"step": 9016 |
|
}, |
|
{ |
|
"epoch": 13.58, |
|
"learning_rate": 3.9395378284267176e-05, |
|
"loss": 0.0159, |
|
"step": 9062 |
|
}, |
|
{ |
|
"epoch": 13.77, |
|
"learning_rate": 3.903133903133903e-05, |
|
"loss": 0.0192, |
|
"step": 9108 |
|
}, |
|
{ |
|
"epoch": 13.97, |
|
"learning_rate": 3.866729977841089e-05, |
|
"loss": 0.018, |
|
"step": 9154 |
|
}, |
|
{ |
|
"epoch": 14.17, |
|
"learning_rate": 3.830326052548275e-05, |
|
"loss": 0.0194, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 12.08, |
|
"learning_rate": 3.793922127255461e-05, |
|
"loss": 0.0139, |
|
"step": 9246 |
|
}, |
|
{ |
|
"epoch": 12.27, |
|
"learning_rate": 3.757518201962647e-05, |
|
"loss": 0.0122, |
|
"step": 9292 |
|
}, |
|
{ |
|
"epoch": 12.47, |
|
"learning_rate": 3.7211142766698325e-05, |
|
"loss": 0.015, |
|
"step": 9338 |
|
}, |
|
{ |
|
"epoch": 12.67, |
|
"learning_rate": 3.684710351377018e-05, |
|
"loss": 0.0112, |
|
"step": 9384 |
|
}, |
|
{ |
|
"epoch": 12.86, |
|
"learning_rate": 3.6483064260842046e-05, |
|
"loss": 0.0134, |
|
"step": 9430 |
|
}, |
|
{ |
|
"epoch": 13.06, |
|
"learning_rate": 3.6119025007913896e-05, |
|
"loss": 0.0139, |
|
"step": 9476 |
|
}, |
|
{ |
|
"epoch": 13.26, |
|
"learning_rate": 3.575498575498576e-05, |
|
"loss": 0.0129, |
|
"step": 9522 |
|
}, |
|
{ |
|
"epoch": 13.45, |
|
"learning_rate": 3.539094650205762e-05, |
|
"loss": 0.0147, |
|
"step": 9568 |
|
}, |
|
{ |
|
"epoch": 13.65, |
|
"learning_rate": 3.5026907249129474e-05, |
|
"loss": 0.0133, |
|
"step": 9614 |
|
}, |
|
{ |
|
"epoch": 13.85, |
|
"learning_rate": 3.466286799620133e-05, |
|
"loss": 0.0135, |
|
"step": 9660 |
|
}, |
|
{ |
|
"epoch": 14.04, |
|
"learning_rate": 3.429882874327319e-05, |
|
"loss": 0.015, |
|
"step": 9706 |
|
}, |
|
{ |
|
"epoch": 14.24, |
|
"learning_rate": 3.393478949034505e-05, |
|
"loss": 0.0128, |
|
"step": 9752 |
|
}, |
|
{ |
|
"epoch": 14.44, |
|
"learning_rate": 3.35707502374169e-05, |
|
"loss": 0.0135, |
|
"step": 9798 |
|
}, |
|
{ |
|
"epoch": 14.63, |
|
"learning_rate": 3.3206710984488765e-05, |
|
"loss": 0.0137, |
|
"step": 9844 |
|
}, |
|
{ |
|
"epoch": 14.83, |
|
"learning_rate": 3.284267173156062e-05, |
|
"loss": 0.013, |
|
"step": 9890 |
|
}, |
|
{ |
|
"epoch": 15.03, |
|
"learning_rate": 3.247863247863248e-05, |
|
"loss": 0.0157, |
|
"step": 9936 |
|
}, |
|
{ |
|
"epoch": 15.22, |
|
"learning_rate": 3.2114593225704336e-05, |
|
"loss": 0.0139, |
|
"step": 9982 |
|
}, |
|
{ |
|
"epoch": 13.13, |
|
"learning_rate": 3.17505539727762e-05, |
|
"loss": 0.0103, |
|
"step": 10028 |
|
}, |
|
{ |
|
"epoch": 13.33, |
|
"learning_rate": 3.138651471984806e-05, |
|
"loss": 0.0095, |
|
"step": 10074 |
|
}, |
|
{ |
|
"epoch": 13.53, |
|
"learning_rate": 3.1022475466919914e-05, |
|
"loss": 0.0114, |
|
"step": 10120 |
|
}, |
|
{ |
|
"epoch": 13.72, |
|
"learning_rate": 3.065843621399177e-05, |
|
"loss": 0.0114, |
|
"step": 10166 |
|
}, |
|
{ |
|
"epoch": 13.92, |
|
"learning_rate": 3.029439696106363e-05, |
|
"loss": 0.0097, |
|
"step": 10212 |
|
}, |
|
{ |
|
"epoch": 14.12, |
|
"learning_rate": 2.9930357708135488e-05, |
|
"loss": 0.0094, |
|
"step": 10258 |
|
}, |
|
{ |
|
"epoch": 14.31, |
|
"learning_rate": 2.956631845520735e-05, |
|
"loss": 0.0109, |
|
"step": 10304 |
|
}, |
|
{ |
|
"epoch": 14.51, |
|
"learning_rate": 2.9202279202279202e-05, |
|
"loss": 0.0105, |
|
"step": 10350 |
|
}, |
|
{ |
|
"epoch": 14.71, |
|
"learning_rate": 2.883823994935106e-05, |
|
"loss": 0.0103, |
|
"step": 10396 |
|
}, |
|
{ |
|
"epoch": 14.9, |
|
"learning_rate": 2.847420069642292e-05, |
|
"loss": 0.0111, |
|
"step": 10442 |
|
}, |
|
{ |
|
"epoch": 15.1, |
|
"learning_rate": 2.8110161443494776e-05, |
|
"loss": 0.012, |
|
"step": 10488 |
|
}, |
|
{ |
|
"epoch": 15.29, |
|
"learning_rate": 2.7746122190566637e-05, |
|
"loss": 0.0104, |
|
"step": 10534 |
|
}, |
|
{ |
|
"epoch": 15.49, |
|
"learning_rate": 2.7382082937638494e-05, |
|
"loss": 0.0105, |
|
"step": 10580 |
|
}, |
|
{ |
|
"epoch": 15.69, |
|
"learning_rate": 2.7018043684710354e-05, |
|
"loss": 0.0096, |
|
"step": 10626 |
|
}, |
|
{ |
|
"epoch": 15.88, |
|
"learning_rate": 2.6654004431782208e-05, |
|
"loss": 0.0126, |
|
"step": 10672 |
|
}, |
|
{ |
|
"epoch": 16.08, |
|
"learning_rate": 2.628996517885407e-05, |
|
"loss": 0.012, |
|
"step": 10718 |
|
}, |
|
{ |
|
"epoch": 16.28, |
|
"learning_rate": 2.5925925925925925e-05, |
|
"loss": 0.0091, |
|
"step": 10764 |
|
}, |
|
{ |
|
"epoch": 14.19, |
|
"learning_rate": 2.5561886672997785e-05, |
|
"loss": 0.0086, |
|
"step": 10810 |
|
}, |
|
{ |
|
"epoch": 14.38, |
|
"learning_rate": 2.5197847420069642e-05, |
|
"loss": 0.0095, |
|
"step": 10856 |
|
}, |
|
{ |
|
"epoch": 14.58, |
|
"learning_rate": 2.48338081671415e-05, |
|
"loss": 0.0074, |
|
"step": 10902 |
|
}, |
|
{ |
|
"epoch": 14.78, |
|
"learning_rate": 2.446976891421336e-05, |
|
"loss": 0.0085, |
|
"step": 10948 |
|
}, |
|
{ |
|
"epoch": 14.97, |
|
"learning_rate": 2.4105729661285217e-05, |
|
"loss": 0.0092, |
|
"step": 10994 |
|
}, |
|
{ |
|
"epoch": 15.17, |
|
"learning_rate": 2.3741690408357077e-05, |
|
"loss": 0.0078, |
|
"step": 11040 |
|
}, |
|
{ |
|
"epoch": 15.37, |
|
"learning_rate": 2.3377651155428934e-05, |
|
"loss": 0.0089, |
|
"step": 11086 |
|
}, |
|
{ |
|
"epoch": 15.56, |
|
"learning_rate": 2.301361190250079e-05, |
|
"loss": 0.0083, |
|
"step": 11132 |
|
}, |
|
{ |
|
"epoch": 15.76, |
|
"learning_rate": 2.264957264957265e-05, |
|
"loss": 0.0079, |
|
"step": 11178 |
|
}, |
|
{ |
|
"epoch": 15.96, |
|
"learning_rate": 2.2285533396644508e-05, |
|
"loss": 0.0091, |
|
"step": 11224 |
|
}, |
|
{ |
|
"epoch": 16.15, |
|
"learning_rate": 2.192149414371637e-05, |
|
"loss": 0.0082, |
|
"step": 11270 |
|
}, |
|
{ |
|
"epoch": 16.35, |
|
"learning_rate": 2.1557454890788225e-05, |
|
"loss": 0.0084, |
|
"step": 11316 |
|
}, |
|
{ |
|
"epoch": 16.55, |
|
"learning_rate": 2.1193415637860082e-05, |
|
"loss": 0.0087, |
|
"step": 11362 |
|
}, |
|
{ |
|
"epoch": 16.74, |
|
"learning_rate": 2.0829376384931943e-05, |
|
"loss": 0.0088, |
|
"step": 11408 |
|
}, |
|
{ |
|
"epoch": 16.94, |
|
"learning_rate": 2.04653371320038e-05, |
|
"loss": 0.0098, |
|
"step": 11454 |
|
}, |
|
{ |
|
"epoch": 17.14, |
|
"learning_rate": 2.010129787907566e-05, |
|
"loss": 0.0087, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 15.05, |
|
"learning_rate": 1.9737258626147517e-05, |
|
"loss": 0.0087, |
|
"step": 11546 |
|
}, |
|
{ |
|
"epoch": 15.24, |
|
"learning_rate": 1.9373219373219374e-05, |
|
"loss": 0.0066, |
|
"step": 11592 |
|
}, |
|
{ |
|
"epoch": 15.44, |
|
"learning_rate": 1.900918012029123e-05, |
|
"loss": 0.0071, |
|
"step": 11638 |
|
}, |
|
{ |
|
"epoch": 15.64, |
|
"learning_rate": 1.8645140867363088e-05, |
|
"loss": 0.0079, |
|
"step": 11684 |
|
}, |
|
{ |
|
"epoch": 15.83, |
|
"learning_rate": 1.828110161443495e-05, |
|
"loss": 0.0074, |
|
"step": 11730 |
|
}, |
|
{ |
|
"epoch": 16.03, |
|
"learning_rate": 1.7917062361506805e-05, |
|
"loss": 0.0069, |
|
"step": 11776 |
|
}, |
|
{ |
|
"epoch": 16.23, |
|
"learning_rate": 1.7553023108578666e-05, |
|
"loss": 0.0063, |
|
"step": 11822 |
|
}, |
|
{ |
|
"epoch": 16.42, |
|
"learning_rate": 1.7188983855650523e-05, |
|
"loss": 0.0075, |
|
"step": 11868 |
|
}, |
|
{ |
|
"epoch": 16.62, |
|
"learning_rate": 1.682494460272238e-05, |
|
"loss": 0.0074, |
|
"step": 11914 |
|
}, |
|
{ |
|
"epoch": 16.82, |
|
"learning_rate": 1.646090534979424e-05, |
|
"loss": 0.0067, |
|
"step": 11960 |
|
}, |
|
{ |
|
"epoch": 17.01, |
|
"learning_rate": 1.6096866096866097e-05, |
|
"loss": 0.0064, |
|
"step": 12006 |
|
}, |
|
{ |
|
"epoch": 17.21, |
|
"learning_rate": 1.5732826843937957e-05, |
|
"loss": 0.0068, |
|
"step": 12052 |
|
}, |
|
{ |
|
"epoch": 17.41, |
|
"learning_rate": 1.5368787591009814e-05, |
|
"loss": 0.0076, |
|
"step": 12098 |
|
}, |
|
{ |
|
"epoch": 17.6, |
|
"learning_rate": 1.5004748338081673e-05, |
|
"loss": 0.0069, |
|
"step": 12144 |
|
}, |
|
{ |
|
"epoch": 17.8, |
|
"learning_rate": 1.4640709085153532e-05, |
|
"loss": 0.0075, |
|
"step": 12190 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"learning_rate": 1.427666983222539e-05, |
|
"loss": 0.0072, |
|
"step": 12236 |
|
}, |
|
{ |
|
"epoch": 18.19, |
|
"learning_rate": 1.3912630579297247e-05, |
|
"loss": 0.0066, |
|
"step": 12282 |
|
}, |
|
{ |
|
"epoch": 16.1, |
|
"learning_rate": 1.3548591326369106e-05, |
|
"loss": 0.0069, |
|
"step": 12328 |
|
}, |
|
{ |
|
"epoch": 16.3, |
|
"learning_rate": 1.3184552073440961e-05, |
|
"loss": 0.0052, |
|
"step": 12374 |
|
}, |
|
{ |
|
"epoch": 16.5, |
|
"learning_rate": 1.282051282051282e-05, |
|
"loss": 0.0064, |
|
"step": 12420 |
|
}, |
|
{ |
|
"epoch": 16.69, |
|
"learning_rate": 1.245647356758468e-05, |
|
"loss": 0.0063, |
|
"step": 12466 |
|
}, |
|
{ |
|
"epoch": 16.89, |
|
"learning_rate": 1.2092434314656539e-05, |
|
"loss": 0.006, |
|
"step": 12512 |
|
}, |
|
{ |
|
"epoch": 17.09, |
|
"learning_rate": 1.1728395061728396e-05, |
|
"loss": 0.0057, |
|
"step": 12558 |
|
}, |
|
{ |
|
"epoch": 17.28, |
|
"learning_rate": 1.1364355808800253e-05, |
|
"loss": 0.0061, |
|
"step": 12604 |
|
}, |
|
{ |
|
"epoch": 17.48, |
|
"learning_rate": 1.1000316555872111e-05, |
|
"loss": 0.0055, |
|
"step": 12650 |
|
}, |
|
{ |
|
"epoch": 17.68, |
|
"learning_rate": 1.063627730294397e-05, |
|
"loss": 0.0053, |
|
"step": 12696 |
|
}, |
|
{ |
|
"epoch": 17.87, |
|
"learning_rate": 1.0272238050015829e-05, |
|
"loss": 0.0051, |
|
"step": 12742 |
|
}, |
|
{ |
|
"epoch": 18.07, |
|
"learning_rate": 9.908198797087687e-06, |
|
"loss": 0.0064, |
|
"step": 12788 |
|
}, |
|
{ |
|
"epoch": 18.26, |
|
"learning_rate": 9.544159544159544e-06, |
|
"loss": 0.0056, |
|
"step": 12834 |
|
}, |
|
{ |
|
"epoch": 18.46, |
|
"learning_rate": 9.180120291231403e-06, |
|
"loss": 0.0062, |
|
"step": 12880 |
|
}, |
|
{ |
|
"epoch": 18.66, |
|
"learning_rate": 8.816081038303262e-06, |
|
"loss": 0.0065, |
|
"step": 12926 |
|
}, |
|
{ |
|
"epoch": 18.85, |
|
"learning_rate": 8.452041785375119e-06, |
|
"loss": 0.0065, |
|
"step": 12972 |
|
}, |
|
{ |
|
"epoch": 19.05, |
|
"learning_rate": 8.088002532446977e-06, |
|
"loss": 0.0064, |
|
"step": 13018 |
|
}, |
|
{ |
|
"epoch": 19.25, |
|
"learning_rate": 7.723963279518836e-06, |
|
"loss": 0.0062, |
|
"step": 13064 |
|
}, |
|
{ |
|
"epoch": 17.16, |
|
"learning_rate": 7.359924026590694e-06, |
|
"loss": 0.0055, |
|
"step": 13110 |
|
}, |
|
{ |
|
"epoch": 17.35, |
|
"learning_rate": 6.995884773662552e-06, |
|
"loss": 0.0046, |
|
"step": 13156 |
|
}, |
|
{ |
|
"epoch": 17.55, |
|
"learning_rate": 6.63184552073441e-06, |
|
"loss": 0.0049, |
|
"step": 13202 |
|
}, |
|
{ |
|
"epoch": 17.75, |
|
"learning_rate": 6.267806267806268e-06, |
|
"loss": 0.0044, |
|
"step": 13248 |
|
}, |
|
{ |
|
"epoch": 17.94, |
|
"learning_rate": 5.903767014878126e-06, |
|
"loss": 0.0057, |
|
"step": 13294 |
|
}, |
|
{ |
|
"epoch": 18.14, |
|
"learning_rate": 5.539727761949985e-06, |
|
"loss": 0.0047, |
|
"step": 13340 |
|
}, |
|
{ |
|
"epoch": 18.34, |
|
"learning_rate": 5.175688509021842e-06, |
|
"loss": 0.0048, |
|
"step": 13386 |
|
}, |
|
{ |
|
"epoch": 18.53, |
|
"learning_rate": 4.8116492560937e-06, |
|
"loss": 0.0051, |
|
"step": 13432 |
|
}, |
|
{ |
|
"epoch": 18.73, |
|
"learning_rate": 4.447610003165559e-06, |
|
"loss": 0.0056, |
|
"step": 13478 |
|
}, |
|
{ |
|
"epoch": 18.93, |
|
"learning_rate": 4.083570750237417e-06, |
|
"loss": 0.0051, |
|
"step": 13524 |
|
}, |
|
{ |
|
"epoch": 19.12, |
|
"learning_rate": 3.7195314973092754e-06, |
|
"loss": 0.0052, |
|
"step": 13570 |
|
}, |
|
{ |
|
"epoch": 19.32, |
|
"learning_rate": 3.3554922443811336e-06, |
|
"loss": 0.0068, |
|
"step": 13616 |
|
}, |
|
{ |
|
"epoch": 19.52, |
|
"learning_rate": 2.991452991452992e-06, |
|
"loss": 0.0064, |
|
"step": 13662 |
|
}, |
|
{ |
|
"epoch": 19.71, |
|
"learning_rate": 2.6274137385248497e-06, |
|
"loss": 0.0055, |
|
"step": 13708 |
|
}, |
|
{ |
|
"epoch": 19.91, |
|
"learning_rate": 2.263374485596708e-06, |
|
"loss": 0.0056, |
|
"step": 13754 |
|
}, |
|
{ |
|
"epoch": 20.11, |
|
"learning_rate": 1.899335232668566e-06, |
|
"loss": 0.0052, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 18.02, |
|
"learning_rate": 1.5352959797404244e-06, |
|
"loss": 0.0051, |
|
"step": 13846 |
|
}, |
|
{ |
|
"epoch": 18.21, |
|
"learning_rate": 1.1712567268122824e-06, |
|
"loss": 0.0052, |
|
"step": 13892 |
|
}, |
|
{ |
|
"epoch": 18.41, |
|
"learning_rate": 8.072174738841407e-07, |
|
"loss": 0.0053, |
|
"step": 13938 |
|
}, |
|
{ |
|
"epoch": 18.61, |
|
"learning_rate": 4.4317822095599874e-07, |
|
"loss": 0.0043, |
|
"step": 13984 |
|
}, |
|
{ |
|
"epoch": 18.8, |
|
"learning_rate": 7.913896802785692e-08, |
|
"loss": 0.0046, |
|
"step": 14030 |
|
} |
|
], |
|
"logging_steps": 46, |
|
"max_steps": 14040, |
|
"num_train_epochs": 60, |
|
"save_steps": 500, |
|
"total_flos": 6.139659927158784e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|