|
{ |
|
"best_metric": 1.0933133363723755, |
|
"best_model_checkpoint": "output/coldplay/checkpoint-550", |
|
"epoch": 11.0, |
|
"global_step": 550, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00013355940688336427, |
|
"loss": 3.2771, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00012302403914397873, |
|
"loss": 2.8643, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00010671211798514472, |
|
"loss": 2.6223, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 8.635498649403293e-05, |
|
"loss": 2.9051, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 6.411334533481218e-05, |
|
"loss": 2.8302, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.2347916539754844e-05, |
|
"loss": 2.8412, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 2.3368877084135277e-05, |
|
"loss": 2.7828, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 9.190657300387505e-06, |
|
"loss": 2.6878, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.3181297643383925e-06, |
|
"loss": 2.57, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 2.627840757369995, |
|
"eval_runtime": 3.4104, |
|
"eval_samples_per_second": 21.405, |
|
"eval_steps_per_second": 2.932, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 1.3746270344901413e-06, |
|
"loss": 2.7251, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 9.576451662754438e-06, |
|
"loss": 2.4207, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 2.4309929383066146e-05, |
|
"loss": 2.7058, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 4.3944626783346644e-05, |
|
"loss": 2.6908, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 6.630773257727353e-05, |
|
"loss": 2.7345, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 8.892450484875447e-05, |
|
"loss": 2.4105, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 0.00010929213048843373, |
|
"loss": 2.5926, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 0.00012515669103944476, |
|
"loss": 2.4909, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 0.00013476258540873022, |
|
"loss": 2.5389, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 2.446333646774292, |
|
"eval_runtime": 1.0636, |
|
"eval_samples_per_second": 76.156, |
|
"eval_steps_per_second": 10.342, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 0.00013704680787354832, |
|
"loss": 2.3598, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 0.00013175658222600302, |
|
"loss": 2.2124, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 0.00011947733444744994, |
|
"loss": 2.1658, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 0.0001015679084058065, |
|
"loss": 2.1519, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 8.001019372440279e-05, |
|
"loss": 2.243, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 5.718980627559731e-05, |
|
"loss": 2.2952, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 3.563209159419354e-05, |
|
"loss": 2.2394, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 1.772266555255008e-05, |
|
"loss": 2.2106, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 5.443417773996978e-06, |
|
"loss": 2.2222, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 1.5319212645169297e-07, |
|
"loss": 2.1897, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 2.3795111179351807, |
|
"eval_runtime": 1.0634, |
|
"eval_samples_per_second": 76.169, |
|
"eval_steps_per_second": 10.344, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 2.4374145912697595e-06, |
|
"loss": 2.0277, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 1.204330896055522e-05, |
|
"loss": 2.0337, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 2.790786951156628e-05, |
|
"loss": 2.0818, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 4.8275495151245426e-05, |
|
"loss": 2.1041, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 7.089226742272638e-05, |
|
"loss": 2.1708, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"learning_rate": 9.325537321665337e-05, |
|
"loss": 2.0552, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 3.72, |
|
"learning_rate": 0.00011289007061693382, |
|
"loss": 2.1205, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"learning_rate": 0.00012762354833724553, |
|
"loss": 1.9704, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"learning_rate": 0.00013582537296550986, |
|
"loss": 1.9882, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 2.3495991230010986, |
|
"eval_runtime": 1.0619, |
|
"eval_samples_per_second": 76.281, |
|
"eval_steps_per_second": 10.359, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"learning_rate": 0.0001365879156874179, |
|
"loss": 2.1933, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"learning_rate": 0.00012982679213998792, |
|
"loss": 1.8172, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 4.26, |
|
"learning_rate": 0.00011629020053848047, |
|
"loss": 1.6238, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 4.36, |
|
"learning_rate": 9.747612470258382e-05, |
|
"loss": 1.8408, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 4.47, |
|
"learning_rate": 7.546656444541333e-05, |
|
"loss": 1.8867, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 4.57, |
|
"learning_rate": 5.2697137691647635e-05, |
|
"loss": 2.0636, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 4.68, |
|
"learning_rate": 3.1687550572992616e-05, |
|
"loss": 1.8887, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 4.79, |
|
"learning_rate": 1.4762762169883855e-05, |
|
"loss": 1.9152, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 4.89, |
|
"learning_rate": 3.795700315696817e-06, |
|
"loss": 1.8236, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 0.0, |
|
"loss": 1.6923, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 2.332808017730713, |
|
"eval_runtime": 1.0629, |
|
"eval_samples_per_second": 76.208, |
|
"eval_steps_per_second": 10.349, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 5.11, |
|
"learning_rate": 3.7957003156967485e-06, |
|
"loss": 1.6147, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 5.21, |
|
"learning_rate": 1.4762762169883802e-05, |
|
"loss": 1.7048, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 5.32, |
|
"learning_rate": 3.168755057299255e-05, |
|
"loss": 1.5544, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 5.43, |
|
"learning_rate": 5.269713769164743e-05, |
|
"loss": 1.5129, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 5.53, |
|
"learning_rate": 7.546656444541325e-05, |
|
"loss": 1.7164, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 5.64, |
|
"learning_rate": 9.747612470258363e-05, |
|
"loss": 1.6792, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 5.74, |
|
"learning_rate": 0.0001162902005384805, |
|
"loss": 1.7173, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 5.85, |
|
"learning_rate": 0.00012982679213998787, |
|
"loss": 1.7795, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 5.96, |
|
"learning_rate": 0.00013658791568741792, |
|
"loss": 1.9717, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 2.336082935333252, |
|
"eval_runtime": 1.0719, |
|
"eval_samples_per_second": 75.567, |
|
"eval_steps_per_second": 10.262, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 6.06, |
|
"learning_rate": 0.00013582537296550986, |
|
"loss": 1.6967, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 6.17, |
|
"learning_rate": 0.00012762354833724559, |
|
"loss": 1.4993, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 6.28, |
|
"learning_rate": 0.0001128900706169339, |
|
"loss": 1.3665, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 6.38, |
|
"learning_rate": 9.325537321665346e-05, |
|
"loss": 1.5098, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 6.49, |
|
"learning_rate": 7.089226742272658e-05, |
|
"loss": 1.6949, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"learning_rate": 4.827549515124539e-05, |
|
"loss": 1.5465, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 6.7, |
|
"learning_rate": 2.7907869511566348e-05, |
|
"loss": 1.506, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 6.81, |
|
"learning_rate": 1.2043308960555334e-05, |
|
"loss": 1.6257, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 6.91, |
|
"learning_rate": 2.437414591269752e-06, |
|
"loss": 1.3859, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 2.334944009780884, |
|
"eval_runtime": 1.0709, |
|
"eval_samples_per_second": 75.638, |
|
"eval_steps_per_second": 10.272, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 7.02, |
|
"learning_rate": 1.5319212645167772e-07, |
|
"loss": 1.5912, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 7.13, |
|
"learning_rate": 5.443417773996994e-06, |
|
"loss": 1.4726, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 7.23, |
|
"learning_rate": 1.772266555255011e-05, |
|
"loss": 1.2431, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 7.34, |
|
"learning_rate": 3.563209159419346e-05, |
|
"loss": 1.3668, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 7.45, |
|
"learning_rate": 5.718980627559723e-05, |
|
"loss": 1.3476, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 7.55, |
|
"learning_rate": 8.001019372440265e-05, |
|
"loss": 1.2561, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 7.66, |
|
"learning_rate": 0.00010156790840580641, |
|
"loss": 1.6616, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 7.77, |
|
"learning_rate": 0.0001194773344474498, |
|
"loss": 1.4607, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 7.87, |
|
"learning_rate": 0.00013175658222600294, |
|
"loss": 1.3461, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 7.98, |
|
"learning_rate": 0.00013704680787354832, |
|
"loss": 1.3842, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 2.3194692134857178, |
|
"eval_runtime": 1.0661, |
|
"eval_samples_per_second": 75.98, |
|
"eval_steps_per_second": 10.318, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 7.6, |
|
"learning_rate": 8.979856581412133e-05, |
|
"loss": 1.3418, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 7.7, |
|
"learning_rate": 0.0001089220683072637, |
|
"loss": 1.3258, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 7.8, |
|
"learning_rate": 0.00012409856581412136, |
|
"loss": 1.67, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 7.9, |
|
"learning_rate": 0.00013384247701784757, |
|
"loss": 1.6416, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 0.0001372, |
|
"loss": 1.5041, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 1.550144910812378, |
|
"eval_runtime": 2.6997, |
|
"eval_samples_per_second": 22.225, |
|
"eval_steps_per_second": 2.963, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 8.1, |
|
"learning_rate": 0.0001338424770178476, |
|
"loss": 1.3775, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 8.2, |
|
"learning_rate": 0.00012409856581412158, |
|
"loss": 1.3052, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 8.3, |
|
"learning_rate": 0.00010892206830726361, |
|
"loss": 1.4469, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 8.4, |
|
"learning_rate": 8.979856581412145e-05, |
|
"loss": 1.5571, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 8.5, |
|
"learning_rate": 6.859999999999995e-05, |
|
"loss": 1.3572, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 8.6, |
|
"learning_rate": 4.740143418587868e-05, |
|
"loss": 1.5739, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 8.7, |
|
"learning_rate": 2.8277931692736505e-05, |
|
"loss": 1.4984, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 8.8, |
|
"learning_rate": 1.3101434185878506e-05, |
|
"loss": 1.4842, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 8.9, |
|
"learning_rate": 3.3575229821524526e-06, |
|
"loss": 1.3847, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 0.0, |
|
"loss": 1.3742, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_loss": 1.5336766242980957, |
|
"eval_runtime": 2.6931, |
|
"eval_samples_per_second": 22.279, |
|
"eval_steps_per_second": 2.971, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 9.48, |
|
"learning_rate": 6.411334533481208e-05, |
|
"loss": 1.183, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 9.58, |
|
"learning_rate": 8.635498649403306e-05, |
|
"loss": 1.514, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 9.69, |
|
"learning_rate": 0.00010671211798514463, |
|
"loss": 1.3301, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 9.79, |
|
"learning_rate": 0.0001230240391439787, |
|
"loss": 1.5153, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 9.9, |
|
"learning_rate": 0.0001335594068833643, |
|
"loss": 1.3648, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 0.0001372, |
|
"loss": 1.3593, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 1.2220196723937988, |
|
"eval_runtime": 3.1642, |
|
"eval_samples_per_second": 23.386, |
|
"eval_steps_per_second": 3.16, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 10.1, |
|
"learning_rate": 0.00013355940688336435, |
|
"loss": 1.2865, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 10.21, |
|
"learning_rate": 0.00012302403914397878, |
|
"loss": 1.315, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 10.31, |
|
"learning_rate": 0.00010671211798514499, |
|
"loss": 1.237, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 10.42, |
|
"learning_rate": 8.635498649403298e-05, |
|
"loss": 1.1549, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 10.52, |
|
"learning_rate": 6.4113345334812e-05, |
|
"loss": 1.3219, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 10.62, |
|
"learning_rate": 4.23479165397549e-05, |
|
"loss": 1.2553, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 10.73, |
|
"learning_rate": 2.3368877084135498e-05, |
|
"loss": 1.1603, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 10.83, |
|
"learning_rate": 9.190657300387535e-06, |
|
"loss": 1.2085, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 10.94, |
|
"learning_rate": 1.3181297643384459e-06, |
|
"loss": 1.2453, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_loss": 1.2095028162002563, |
|
"eval_runtime": 3.297, |
|
"eval_samples_per_second": 22.445, |
|
"eval_steps_per_second": 3.033, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 10.6, |
|
"learning_rate": 4.7401434185878923e-05, |
|
"loss": 1.0609, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 10.7, |
|
"learning_rate": 2.8277931692736518e-05, |
|
"loss": 1.2072, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 10.8, |
|
"learning_rate": 1.3101434185878659e-05, |
|
"loss": 1.2221, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 10.9, |
|
"learning_rate": 3.3575229821524526e-06, |
|
"loss": 1.0503, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"learning_rate": 0.0, |
|
"loss": 1.0194, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_loss": 1.0933133363723755, |
|
"eval_runtime": 1.2839, |
|
"eval_samples_per_second": 43.616, |
|
"eval_steps_per_second": 5.452, |
|
"step": 550 |
|
} |
|
], |
|
"max_steps": 600, |
|
"num_train_epochs": 12, |
|
"total_flos": 572229550080000.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|