|
{ |
|
"best_metric": 0.6892715692520142, |
|
"best_model_checkpoint": "final_models/glue_models/structroberta_s2_50ep//finetune/mnli/checkpoint-13400", |
|
"epoch": 3.793103448275862, |
|
"global_step": 15400, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.05, |
|
"eval_accuracy": 0.453520268201828, |
|
"eval_loss": 1.0527480840682983, |
|
"eval_runtime": 13.8459, |
|
"eval_samples_per_second": 473.93, |
|
"eval_steps_per_second": 59.295, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_accuracy": 0.5099055171012878, |
|
"eval_loss": 0.9751997590065002, |
|
"eval_runtime": 13.8646, |
|
"eval_samples_per_second": 473.293, |
|
"eval_steps_per_second": 59.216, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.938423645320197e-05, |
|
"loss": 1.036, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_accuracy": 0.5175251364707947, |
|
"eval_loss": 0.9599342942237854, |
|
"eval_runtime": 13.9722, |
|
"eval_samples_per_second": 469.648, |
|
"eval_steps_per_second": 58.76, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_accuracy": 0.5405364036560059, |
|
"eval_loss": 0.9497725367546082, |
|
"eval_runtime": 13.977, |
|
"eval_samples_per_second": 469.486, |
|
"eval_steps_per_second": 58.739, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.876847290640394e-05, |
|
"loss": 0.9512, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_accuracy": 0.5626333355903625, |
|
"eval_loss": 0.9136764407157898, |
|
"eval_runtime": 13.8916, |
|
"eval_samples_per_second": 472.373, |
|
"eval_steps_per_second": 59.101, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_accuracy": 0.5790917277336121, |
|
"eval_loss": 0.9000873565673828, |
|
"eval_runtime": 13.9157, |
|
"eval_samples_per_second": 471.554, |
|
"eval_steps_per_second": 58.998, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_accuracy": 0.5789393186569214, |
|
"eval_loss": 0.8979566097259521, |
|
"eval_runtime": 13.9289, |
|
"eval_samples_per_second": 471.106, |
|
"eval_steps_per_second": 58.942, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.8152709359605915e-05, |
|
"loss": 0.9165, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_accuracy": 0.5932642221450806, |
|
"eval_loss": 0.8788071870803833, |
|
"eval_runtime": 13.9191, |
|
"eval_samples_per_second": 471.437, |
|
"eval_steps_per_second": 58.984, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_accuracy": 0.5729960203170776, |
|
"eval_loss": 0.9093856811523438, |
|
"eval_runtime": 13.9251, |
|
"eval_samples_per_second": 471.237, |
|
"eval_steps_per_second": 58.958, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.753694581280788e-05, |
|
"loss": 0.8915, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_accuracy": 0.5684242844581604, |
|
"eval_loss": 0.8900429606437683, |
|
"eval_runtime": 13.875, |
|
"eval_samples_per_second": 472.938, |
|
"eval_steps_per_second": 59.171, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"eval_accuracy": 0.606217622756958, |
|
"eval_loss": 0.8568419218063354, |
|
"eval_runtime": 13.8846, |
|
"eval_samples_per_second": 472.611, |
|
"eval_steps_per_second": 59.13, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"eval_accuracy": 0.6046937108039856, |
|
"eval_loss": 0.8561736345291138, |
|
"eval_runtime": 13.8666, |
|
"eval_samples_per_second": 473.223, |
|
"eval_steps_per_second": 59.207, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.6921182266009855e-05, |
|
"loss": 0.873, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_accuracy": 0.6234380006790161, |
|
"eval_loss": 0.8509392142295837, |
|
"eval_runtime": 13.8669, |
|
"eval_samples_per_second": 473.214, |
|
"eval_steps_per_second": 59.206, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"eval_accuracy": 0.6031697392463684, |
|
"eval_loss": 0.8599078059196472, |
|
"eval_runtime": 13.8611, |
|
"eval_samples_per_second": 473.412, |
|
"eval_steps_per_second": 59.231, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 4.630541871921182e-05, |
|
"loss": 0.861, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_accuracy": 0.6165803074836731, |
|
"eval_loss": 0.8439797759056091, |
|
"eval_runtime": 13.8579, |
|
"eval_samples_per_second": 473.522, |
|
"eval_steps_per_second": 59.244, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_accuracy": 0.6261810660362244, |
|
"eval_loss": 0.8294622898101807, |
|
"eval_runtime": 13.8888, |
|
"eval_samples_per_second": 472.468, |
|
"eval_steps_per_second": 59.113, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_accuracy": 0.6272478103637695, |
|
"eval_loss": 0.8390009999275208, |
|
"eval_runtime": 13.8984, |
|
"eval_samples_per_second": 472.14, |
|
"eval_steps_per_second": 59.072, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 4.5689655172413794e-05, |
|
"loss": 0.8449, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"eval_accuracy": 0.6289241313934326, |
|
"eval_loss": 0.8257491588592529, |
|
"eval_runtime": 13.9296, |
|
"eval_samples_per_second": 471.084, |
|
"eval_steps_per_second": 58.939, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_accuracy": 0.6309052109718323, |
|
"eval_loss": 0.81971675157547, |
|
"eval_runtime": 13.9569, |
|
"eval_samples_per_second": 470.163, |
|
"eval_steps_per_second": 58.824, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 4.507389162561577e-05, |
|
"loss": 0.8409, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_accuracy": 0.6334958672523499, |
|
"eval_loss": 0.8270503878593445, |
|
"eval_runtime": 13.9849, |
|
"eval_samples_per_second": 469.221, |
|
"eval_steps_per_second": 58.706, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"eval_accuracy": 0.6324291229248047, |
|
"eval_loss": 0.8238465785980225, |
|
"eval_runtime": 13.986, |
|
"eval_samples_per_second": 469.184, |
|
"eval_steps_per_second": 58.702, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"eval_accuracy": 0.6322767734527588, |
|
"eval_loss": 0.8234522342681885, |
|
"eval_runtime": 13.9817, |
|
"eval_samples_per_second": 469.329, |
|
"eval_steps_per_second": 58.72, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 4.4458128078817734e-05, |
|
"loss": 0.7883, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"eval_accuracy": 0.6405059695243835, |
|
"eval_loss": 0.8090473413467407, |
|
"eval_runtime": 13.9835, |
|
"eval_samples_per_second": 469.267, |
|
"eval_steps_per_second": 58.712, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"eval_accuracy": 0.635019838809967, |
|
"eval_loss": 0.8067367076873779, |
|
"eval_runtime": 13.9898, |
|
"eval_samples_per_second": 469.056, |
|
"eval_steps_per_second": 58.686, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 4.384236453201971e-05, |
|
"loss": 0.7789, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"eval_accuracy": 0.6330386996269226, |
|
"eval_loss": 0.8147866129875183, |
|
"eval_runtime": 13.941, |
|
"eval_samples_per_second": 470.699, |
|
"eval_steps_per_second": 58.891, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"eval_accuracy": 0.6482779383659363, |
|
"eval_loss": 0.7999472618103027, |
|
"eval_runtime": 13.8888, |
|
"eval_samples_per_second": 472.466, |
|
"eval_steps_per_second": 59.112, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"eval_accuracy": 0.6427918076515198, |
|
"eval_loss": 0.8316212296485901, |
|
"eval_runtime": 13.8743, |
|
"eval_samples_per_second": 472.96, |
|
"eval_steps_per_second": 59.174, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 4.3226600985221674e-05, |
|
"loss": 0.7644, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"eval_accuracy": 0.6462968587875366, |
|
"eval_loss": 0.8003305196762085, |
|
"eval_runtime": 13.8974, |
|
"eval_samples_per_second": 472.173, |
|
"eval_steps_per_second": 59.076, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"eval_accuracy": 0.6531545519828796, |
|
"eval_loss": 0.7893626689910889, |
|
"eval_runtime": 13.9226, |
|
"eval_samples_per_second": 471.319, |
|
"eval_steps_per_second": 58.969, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 4.261083743842365e-05, |
|
"loss": 0.7648, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"eval_accuracy": 0.6549832224845886, |
|
"eval_loss": 0.7921696305274963, |
|
"eval_runtime": 13.9392, |
|
"eval_samples_per_second": 470.76, |
|
"eval_steps_per_second": 58.899, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"eval_accuracy": 0.6469064354896545, |
|
"eval_loss": 0.8024189472198486, |
|
"eval_runtime": 13.9187, |
|
"eval_samples_per_second": 471.45, |
|
"eval_steps_per_second": 58.985, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"eval_accuracy": 0.6566595435142517, |
|
"eval_loss": 0.7816490530967712, |
|
"eval_runtime": 13.9487, |
|
"eval_samples_per_second": 470.437, |
|
"eval_steps_per_second": 58.858, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 4.199507389162562e-05, |
|
"loss": 0.765, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"eval_accuracy": 0.6543736457824707, |
|
"eval_loss": 0.7835971117019653, |
|
"eval_runtime": 13.8588, |
|
"eval_samples_per_second": 473.491, |
|
"eval_steps_per_second": 59.24, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"eval_accuracy": 0.6411154866218567, |
|
"eval_loss": 0.8067611455917358, |
|
"eval_runtime": 13.8687, |
|
"eval_samples_per_second": 473.153, |
|
"eval_steps_per_second": 59.198, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 4.1379310344827587e-05, |
|
"loss": 0.7583, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"eval_accuracy": 0.6603170037269592, |
|
"eval_loss": 0.7717912197113037, |
|
"eval_runtime": 13.8664, |
|
"eval_samples_per_second": 473.231, |
|
"eval_steps_per_second": 59.208, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"eval_accuracy": 0.6635172367095947, |
|
"eval_loss": 0.7803527116775513, |
|
"eval_runtime": 13.8557, |
|
"eval_samples_per_second": 473.596, |
|
"eval_steps_per_second": 59.254, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"eval_accuracy": 0.6674794554710388, |
|
"eval_loss": 0.7677510380744934, |
|
"eval_runtime": 13.8768, |
|
"eval_samples_per_second": 472.875, |
|
"eval_steps_per_second": 59.163, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 4.076354679802955e-05, |
|
"loss": 0.7584, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"eval_accuracy": 0.6589454412460327, |
|
"eval_loss": 0.7867729067802429, |
|
"eval_runtime": 13.9787, |
|
"eval_samples_per_second": 469.43, |
|
"eval_steps_per_second": 58.732, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"eval_accuracy": 0.6604693531990051, |
|
"eval_loss": 0.7763269543647766, |
|
"eval_runtime": 13.9708, |
|
"eval_samples_per_second": 469.693, |
|
"eval_steps_per_second": 58.765, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 4.014778325123153e-05, |
|
"loss": 0.7515, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"eval_accuracy": 0.6673270463943481, |
|
"eval_loss": 0.7527127265930176, |
|
"eval_runtime": 13.9175, |
|
"eval_samples_per_second": 471.491, |
|
"eval_steps_per_second": 58.99, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"eval_accuracy": 0.6624504923820496, |
|
"eval_loss": 0.8022358417510986, |
|
"eval_runtime": 13.9295, |
|
"eval_samples_per_second": 471.086, |
|
"eval_steps_per_second": 58.94, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"eval_accuracy": 0.6630600690841675, |
|
"eval_loss": 0.7974384427070618, |
|
"eval_runtime": 13.9703, |
|
"eval_samples_per_second": 469.711, |
|
"eval_steps_per_second": 58.768, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 3.95320197044335e-05, |
|
"loss": 0.6779, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"eval_accuracy": 0.6708320379257202, |
|
"eval_loss": 0.768686830997467, |
|
"eval_runtime": 13.9807, |
|
"eval_samples_per_second": 469.361, |
|
"eval_steps_per_second": 58.724, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"eval_accuracy": 0.6661078929901123, |
|
"eval_loss": 0.7770901322364807, |
|
"eval_runtime": 13.9743, |
|
"eval_samples_per_second": 469.578, |
|
"eval_steps_per_second": 58.751, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 3.891625615763547e-05, |
|
"loss": 0.6587, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"eval_accuracy": 0.6674794554710388, |
|
"eval_loss": 0.7751796245574951, |
|
"eval_runtime": 13.9722, |
|
"eval_samples_per_second": 469.647, |
|
"eval_steps_per_second": 58.76, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"eval_accuracy": 0.6688509583473206, |
|
"eval_loss": 0.7814744114875793, |
|
"eval_runtime": 13.9424, |
|
"eval_samples_per_second": 470.652, |
|
"eval_steps_per_second": 58.885, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"eval_accuracy": 0.6693081259727478, |
|
"eval_loss": 0.7871124744415283, |
|
"eval_runtime": 13.9317, |
|
"eval_samples_per_second": 471.012, |
|
"eval_steps_per_second": 58.93, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 3.830049261083744e-05, |
|
"loss": 0.6662, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"eval_accuracy": 0.6717464327812195, |
|
"eval_loss": 0.768136203289032, |
|
"eval_runtime": 13.9811, |
|
"eval_samples_per_second": 469.347, |
|
"eval_steps_per_second": 58.722, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"eval_accuracy": 0.6755562424659729, |
|
"eval_loss": 0.7916134595870972, |
|
"eval_runtime": 13.9048, |
|
"eval_samples_per_second": 471.925, |
|
"eval_steps_per_second": 59.045, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 3.768472906403941e-05, |
|
"loss": 0.6585, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"eval_accuracy": 0.6796708106994629, |
|
"eval_loss": 0.7765262722969055, |
|
"eval_runtime": 13.8808, |
|
"eval_samples_per_second": 472.74, |
|
"eval_steps_per_second": 59.146, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"eval_accuracy": 0.6746419072151184, |
|
"eval_loss": 0.7696249485015869, |
|
"eval_runtime": 13.8943, |
|
"eval_samples_per_second": 472.28, |
|
"eval_steps_per_second": 59.089, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"eval_accuracy": 0.6705272793769836, |
|
"eval_loss": 0.795970618724823, |
|
"eval_runtime": 13.923, |
|
"eval_samples_per_second": 471.308, |
|
"eval_steps_per_second": 58.967, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 3.7068965517241385e-05, |
|
"loss": 0.6648, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"eval_accuracy": 0.681347131729126, |
|
"eval_loss": 0.7532095909118652, |
|
"eval_runtime": 13.9308, |
|
"eval_samples_per_second": 471.042, |
|
"eval_steps_per_second": 58.934, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"eval_accuracy": 0.6834806203842163, |
|
"eval_loss": 0.7632550001144409, |
|
"eval_runtime": 13.9049, |
|
"eval_samples_per_second": 471.921, |
|
"eval_steps_per_second": 59.044, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 3.645320197044335e-05, |
|
"loss": 0.6663, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"eval_accuracy": 0.6773849725723267, |
|
"eval_loss": 0.7755422592163086, |
|
"eval_runtime": 13.8928, |
|
"eval_samples_per_second": 472.331, |
|
"eval_steps_per_second": 59.095, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"eval_accuracy": 0.6784517168998718, |
|
"eval_loss": 0.7512595653533936, |
|
"eval_runtime": 13.8668, |
|
"eval_samples_per_second": 473.218, |
|
"eval_steps_per_second": 59.206, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"eval_accuracy": 0.6869856715202332, |
|
"eval_loss": 0.7553817629814148, |
|
"eval_runtime": 13.8821, |
|
"eval_samples_per_second": 472.693, |
|
"eval_steps_per_second": 59.141, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 3.583743842364532e-05, |
|
"loss": 0.6645, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"eval_accuracy": 0.6833282709121704, |
|
"eval_loss": 0.7605084776878357, |
|
"eval_runtime": 13.8864, |
|
"eval_samples_per_second": 472.548, |
|
"eval_steps_per_second": 59.122, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"eval_accuracy": 0.681347131729126, |
|
"eval_loss": 0.7520666718482971, |
|
"eval_runtime": 13.9251, |
|
"eval_samples_per_second": 471.236, |
|
"eval_steps_per_second": 58.958, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 3.522167487684729e-05, |
|
"loss": 0.6596, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"eval_accuracy": 0.6738799214363098, |
|
"eval_loss": 0.7592176795005798, |
|
"eval_runtime": 13.9705, |
|
"eval_samples_per_second": 469.705, |
|
"eval_steps_per_second": 58.767, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.6863760948181152, |
|
"eval_loss": 0.7859818935394287, |
|
"eval_runtime": 13.9639, |
|
"eval_samples_per_second": 469.927, |
|
"eval_steps_per_second": 58.795, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"eval_accuracy": 0.6819567084312439, |
|
"eval_loss": 0.7805635929107666, |
|
"eval_runtime": 13.8834, |
|
"eval_samples_per_second": 472.651, |
|
"eval_steps_per_second": 59.135, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 3.4605911330049265e-05, |
|
"loss": 0.5974, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"eval_accuracy": 0.6827186942100525, |
|
"eval_loss": 0.8164608478546143, |
|
"eval_runtime": 13.879, |
|
"eval_samples_per_second": 472.8, |
|
"eval_steps_per_second": 59.154, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"eval_accuracy": 0.6831758618354797, |
|
"eval_loss": 0.7926530838012695, |
|
"eval_runtime": 13.8407, |
|
"eval_samples_per_second": 474.109, |
|
"eval_steps_per_second": 59.318, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 3.399014778325123e-05, |
|
"loss": 0.5539, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"eval_accuracy": 0.6781468987464905, |
|
"eval_loss": 0.8301470279693604, |
|
"eval_runtime": 13.8501, |
|
"eval_samples_per_second": 473.786, |
|
"eval_steps_per_second": 59.277, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"eval_accuracy": 0.6764705777168274, |
|
"eval_loss": 0.8108323812484741, |
|
"eval_runtime": 13.8741, |
|
"eval_samples_per_second": 472.969, |
|
"eval_steps_per_second": 59.175, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"eval_accuracy": 0.6892715692520142, |
|
"eval_loss": 0.8093796968460083, |
|
"eval_runtime": 13.8736, |
|
"eval_samples_per_second": 472.983, |
|
"eval_steps_per_second": 59.177, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 3.3374384236453204e-05, |
|
"loss": 0.5556, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"eval_accuracy": 0.6766229867935181, |
|
"eval_loss": 0.8061802983283997, |
|
"eval_runtime": 13.8439, |
|
"eval_samples_per_second": 474.0, |
|
"eval_steps_per_second": 59.304, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"eval_accuracy": 0.6857665181159973, |
|
"eval_loss": 0.8102853894233704, |
|
"eval_runtime": 13.8952, |
|
"eval_samples_per_second": 472.25, |
|
"eval_steps_per_second": 59.085, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"learning_rate": 3.275862068965517e-05, |
|
"loss": 0.561, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"eval_accuracy": 0.6811947822570801, |
|
"eval_loss": 0.7732057571411133, |
|
"eval_runtime": 13.9222, |
|
"eval_samples_per_second": 471.335, |
|
"eval_steps_per_second": 58.971, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"eval_accuracy": 0.6784517168998718, |
|
"eval_loss": 0.8165723085403442, |
|
"eval_runtime": 13.9595, |
|
"eval_samples_per_second": 470.073, |
|
"eval_steps_per_second": 58.813, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"eval_accuracy": 0.6802803874015808, |
|
"eval_loss": 0.807296097278595, |
|
"eval_runtime": 14.0128, |
|
"eval_samples_per_second": 468.288, |
|
"eval_steps_per_second": 58.589, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"learning_rate": 3.2142857142857144e-05, |
|
"loss": 0.5708, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"eval_accuracy": 0.6888144016265869, |
|
"eval_loss": 0.796098530292511, |
|
"eval_runtime": 13.9315, |
|
"eval_samples_per_second": 471.019, |
|
"eval_steps_per_second": 58.931, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"eval_accuracy": 0.6831758618354797, |
|
"eval_loss": 0.7848635315895081, |
|
"eval_runtime": 13.9748, |
|
"eval_samples_per_second": 469.561, |
|
"eval_steps_per_second": 58.749, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"learning_rate": 3.152709359605912e-05, |
|
"loss": 0.5664, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"eval_accuracy": 0.6843950152397156, |
|
"eval_loss": 0.8060910105705261, |
|
"eval_runtime": 13.9727, |
|
"eval_samples_per_second": 469.631, |
|
"eval_steps_per_second": 58.758, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"eval_accuracy": 0.6804327964782715, |
|
"eval_loss": 0.7997169494628906, |
|
"eval_runtime": 13.9901, |
|
"eval_samples_per_second": 469.045, |
|
"eval_steps_per_second": 58.684, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"eval_accuracy": 0.6877476572990417, |
|
"eval_loss": 0.7792339324951172, |
|
"eval_runtime": 13.9337, |
|
"eval_samples_per_second": 470.946, |
|
"eval_steps_per_second": 58.922, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"step": 15400, |
|
"total_flos": 9.026901710390784e+16, |
|
"train_loss": 0.7314797597117239, |
|
"train_runtime": 6341.8601, |
|
"train_samples_per_second": 409.627, |
|
"train_steps_per_second": 6.402 |
|
} |
|
], |
|
"max_steps": 40600, |
|
"num_train_epochs": 10, |
|
"total_flos": 9.026901710390784e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|