|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.99726094101893, |
|
"global_step": 512, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.3124999999999999e-05, |
|
"loss": 3.3565, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 2.6249999999999998e-05, |
|
"loss": 3.1293, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 3.9374999999999995e-05, |
|
"loss": 2.987, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 5.2499999999999995e-05, |
|
"loss": 2.9036, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 6.5625e-05, |
|
"loss": 2.7414, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 6.99971917961865e-05, |
|
"loss": 2.7513, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 6.998244995833964e-05, |
|
"loss": 2.6567, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 6.995507775098683e-05, |
|
"loss": 2.6567, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 6.991508505682909e-05, |
|
"loss": 2.651, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 6.986248631517822e-05, |
|
"loss": 2.5459, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 6.979730051674372e-05, |
|
"loss": 2.5653, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 6.9719551196776e-05, |
|
"loss": 2.5159, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 6.962926642656914e-05, |
|
"loss": 2.4596, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 6.952647880332572e-05, |
|
"loss": 2.5057, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 6.941122543838767e-05, |
|
"loss": 2.4062, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 6.92835479438373e-05, |
|
"loss": 2.44, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 6.914349241747322e-05, |
|
"loss": 2.3659, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 6.899110942616686e-05, |
|
"loss": 2.3672, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 6.882645398760536e-05, |
|
"loss": 2.3759, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 6.864958555042743e-05, |
|
"loss": 2.3567, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 6.846056797275964e-05, |
|
"loss": 2.3606, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 6.825946949916035e-05, |
|
"loss": 2.3166, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 6.804636273598024e-05, |
|
"loss": 2.2707, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 6.782132462514781e-05, |
|
"loss": 2.2617, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 6.758443641638958e-05, |
|
"loss": 2.2521, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 6.733578363789503e-05, |
|
"loss": 2.2746, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 6.707545606543672e-05, |
|
"loss": 2.2112, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 6.680354768995687e-05, |
|
"loss": 2.2237, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 6.652015668363205e-05, |
|
"loss": 2.2225, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 6.622538536442822e-05, |
|
"loss": 2.1471, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 6.5919340159159e-05, |
|
"loss": 2.0738, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 6.560213156506037e-05, |
|
"loss": 2.1797, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 6.527387410989579e-05, |
|
"loss": 2.2545, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 6.493468631060607e-05, |
|
"loss": 2.214, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 6.458469063051903e-05, |
|
"loss": 2.1769, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 6.422401343513426e-05, |
|
"loss": 2.1163, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 6.385278494649894e-05, |
|
"loss": 2.1318, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 6.347113919619143e-05, |
|
"loss": 2.1464, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 6.307921397692931e-05, |
|
"loss": 2.1739, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 6.267715079281944e-05, |
|
"loss": 2.1177, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 6.226509480826817e-05, |
|
"loss": 2.1126, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 6.184319479556984e-05, |
|
"loss": 2.1321, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 6.141160308119273e-05, |
|
"loss": 2.0559, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 6.0970475490781874e-05, |
|
"loss": 2.1131, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 6.0519971292898285e-05, |
|
"loss": 2.1012, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 6.0060253141515295e-05, |
|
"loss": 2.076, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 5.95914870172926e-05, |
|
"loss": 2.0437, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 5.911384216764903e-05, |
|
"loss": 2.0269, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 5.862749104565608e-05, |
|
"loss": 2.0285, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 5.8132609247773926e-05, |
|
"loss": 2.0499, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 5.762937545045251e-05, |
|
"loss": 2.0388, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 5.711797134562063e-05, |
|
"loss": 1.9678, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 5.6598581575086404e-05, |
|
"loss": 2.0444, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 5.60713936638724e-05, |
|
"loss": 2.0038, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 5.553659795251013e-05, |
|
"loss": 1.9812, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 5.499438752831773e-05, |
|
"loss": 1.9962, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 5.444495815568607e-05, |
|
"loss": 1.9857, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 5.38885082053983e-05, |
|
"loss": 1.9288, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 5.332523858300823e-05, |
|
"loss": 1.9271, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 5.2755352656303755e-05, |
|
"loss": 1.9128, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 5.217905618188108e-05, |
|
"loss": 2.0431, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 5.1596557230856576e-05, |
|
"loss": 1.8937, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 5.1008066113743024e-05, |
|
"loss": 1.9296, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 5.04137953045172e-05, |
|
"loss": 1.9901, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 4.981395936390644e-05, |
|
"loss": 1.9698, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 4.920877486192174e-05, |
|
"loss": 1.9076, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 4.85984602996655e-05, |
|
"loss": 1.8708, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 4.798323603044187e-05, |
|
"loss": 1.9115, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 4.736332418019853e-05, |
|
"loss": 1.8812, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.67389485673284e-05, |
|
"loss": 1.9152, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.6110334621860254e-05, |
|
"loss": 2.0206, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 4.547770930406753e-05, |
|
"loss": 1.9291, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 4.4841301022524574e-05, |
|
"loss": 1.9792, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 4.420133955164006e-05, |
|
"loss": 1.9464, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 4.3558055948697185e-05, |
|
"loss": 1.835, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 4.291168247043072e-05, |
|
"loss": 1.771, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 4.2262452489171054e-05, |
|
"loss": 1.9584, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.1610600408585395e-05, |
|
"loss": 2.0061, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.095636157904658e-05, |
|
"loss": 1.8876, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 4.029997221266018e-05, |
|
"loss": 1.8706, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.964166929798036e-05, |
|
"loss": 1.884, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 3.898169051444552e-05, |
|
"loss": 1.8348, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 3.8320274146564356e-05, |
|
"loss": 1.8026, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.7657658997883615e-05, |
|
"loss": 1.847, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.699408430476834e-05, |
|
"loss": 1.8465, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 1.8656249046325684, |
|
"eval_runtime": 309.3485, |
|
"eval_samples_per_second": 13.629, |
|
"eval_steps_per_second": 13.629, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 3.632978965002587e-05, |
|
"loss": 2.1828, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 3.566501487640479e-05, |
|
"loss": 1.6617, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 3.5e-05, |
|
"loss": 1.6496, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 3.433498512359521e-05, |
|
"loss": 1.5581, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 3.367021034997412e-05, |
|
"loss": 1.6121, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 3.300591569523165e-05, |
|
"loss": 1.6664, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 3.2342341002116385e-05, |
|
"loss": 1.5229, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 3.1679725853435645e-05, |
|
"loss": 1.5276, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 3.101830948555448e-05, |
|
"loss": 1.6506, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 3.035833070201963e-05, |
|
"loss": 1.5594, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 2.9700027787339826e-05, |
|
"loss": 1.4913, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 2.904363842095341e-05, |
|
"loss": 1.6122, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 2.838939959141461e-05, |
|
"loss": 1.6086, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 2.7737547510828943e-05, |
|
"loss": 1.5963, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 2.7088317529569277e-05, |
|
"loss": 1.6068, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 2.6441944051302816e-05, |
|
"loss": 1.528, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 2.5798660448359928e-05, |
|
"loss": 1.629, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 2.5158698977475426e-05, |
|
"loss": 1.6335, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 2.4522290695932468e-05, |
|
"loss": 1.5641, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 2.3889665378139753e-05, |
|
"loss": 1.5514, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 2.32610514326716e-05, |
|
"loss": 1.6076, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 2.263667581980147e-05, |
|
"loss": 1.5815, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 2.2016763969558128e-05, |
|
"loss": 1.5281, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 2.140153970033449e-05, |
|
"loss": 1.5254, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 2.0791225138078253e-05, |
|
"loss": 1.5966, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 2.0186040636093567e-05, |
|
"loss": 1.5427, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 1.9586204695482795e-05, |
|
"loss": 1.6041, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 1.8991933886256963e-05, |
|
"loss": 1.4729, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 1.840344276914342e-05, |
|
"loss": 1.564, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 1.7820943818118924e-05, |
|
"loss": 1.5334, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 1.7244647343696252e-05, |
|
"loss": 1.5026, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 1.6674761416991767e-05, |
|
"loss": 1.586, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 1.611149179460171e-05, |
|
"loss": 1.5368, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 1.555504184431393e-05, |
|
"loss": 1.5588, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 1.500561247168226e-05, |
|
"loss": 1.5276, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 1.446340204748987e-05, |
|
"loss": 1.4957, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 1.3928606336127589e-05, |
|
"loss": 1.5479, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 1.34014184249136e-05, |
|
"loss": 1.4862, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 1.2882028654379362e-05, |
|
"loss": 1.5849, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 1.2370624549547507e-05, |
|
"loss": 1.5811, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 1.186739075222608e-05, |
|
"loss": 1.4914, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 1.1372508954343916e-05, |
|
"loss": 1.5442, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 1.0886157832350968e-05, |
|
"loss": 1.528, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 1.0408512982707408e-05, |
|
"loss": 1.51, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 9.939746858484699e-06, |
|
"loss": 1.4226, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 9.480028707101716e-06, |
|
"loss": 1.5474, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 9.02952450921813e-06, |
|
"loss": 1.5436, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 8.588396918807265e-06, |
|
"loss": 1.5102, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 8.156805204430163e-06, |
|
"loss": 1.4774, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 7.734905191731818e-06, |
|
"loss": 1.5731, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 7.322849207180554e-06, |
|
"loss": 1.4854, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 6.92078602307069e-06, |
|
"loss": 1.5024, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 6.528860803808572e-06, |
|
"loss": 1.4762, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 6.147215053501067e-06, |
|
"loss": 1.5015, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 5.775986564865746e-06, |
|
"loss": 1.4655, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 5.4153093694809615e-06, |
|
"loss": 1.3807, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 5.065313689393926e-06, |
|
"loss": 1.5052, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 4.7261258901042164e-06, |
|
"loss": 1.5502, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 4.397868434939627e-06, |
|
"loss": 1.4234, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 4.080659840841001e-06, |
|
"loss": 1.4875, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 3.774614635571785e-06, |
|
"loss": 1.4858, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 3.4798433163679534e-06, |
|
"loss": 1.4217, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 3.1964523100431278e-06, |
|
"loss": 1.4572, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 2.924543934563277e-06, |
|
"loss": 1.5039, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 2.664216362104964e-06, |
|
"loss": 1.4897, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 2.4155635836104113e-06, |
|
"loss": 1.5384, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 2.178675374852189e-06, |
|
"loss": 1.505, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 1.953637264019748e-06, |
|
"loss": 1.5019, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 1.7405305008396436e-06, |
|
"loss": 1.5941, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 1.5394320272403605e-06, |
|
"loss": 1.5265, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 1.3504144495725661e-06, |
|
"loss": 1.5035, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 1.1735460123946455e-06, |
|
"loss": 1.5002, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 1.0088905738331372e-06, |
|
"loss": 1.4317, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 8.565075825267759e-07, |
|
"loss": 1.4852, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 7.16452056162693e-07, |
|
"loss": 1.5445, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 5.887745616123169e-07, |
|
"loss": 1.4855, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 4.735211966742819e-07, |
|
"loss": 1.4852, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 3.7073357343086086e-07, |
|
"loss": 1.4545, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 2.804488032239943e-07, |
|
"loss": 1.445, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 2.0269948325627227e-07, |
|
"loss": 1.4484, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.3751368482176727e-07, |
|
"loss": 1.4113, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 8.491494317091229e-08, |
|
"loss": 1.5394, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 4.492224901315344e-08, |
|
"loss": 1.4475, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 1.7550041660350232e-08, |
|
"loss": 1.4934, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 2.808203813499665e-09, |
|
"loss": 1.4903, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 1.7396166324615479, |
|
"eval_runtime": 309.3927, |
|
"eval_samples_per_second": 13.627, |
|
"eval_steps_per_second": 13.627, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"step": 512, |
|
"total_flos": 2.7416092993388544e+17, |
|
"train_loss": 1.8553011305630207, |
|
"train_runtime": 15007.5766, |
|
"train_samples_per_second": 4.379, |
|
"train_steps_per_second": 0.034 |
|
} |
|
], |
|
"max_steps": 512, |
|
"num_train_epochs": 2, |
|
"total_flos": 2.7416092993388544e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|