structroberta_sx_final / finetune /mnli /trainer_state.json
Omar
update
88cccb3
raw
history blame
23.3 kB
{
"best_metric": 0.6892715692520142,
"best_model_checkpoint": "final_models/glue_models/structroberta_s2_50ep//finetune/mnli/checkpoint-13400",
"epoch": 3.793103448275862,
"global_step": 15400,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.05,
"eval_accuracy": 0.453520268201828,
"eval_loss": 1.0527480840682983,
"eval_runtime": 13.8459,
"eval_samples_per_second": 473.93,
"eval_steps_per_second": 59.295,
"step": 200
},
{
"epoch": 0.1,
"eval_accuracy": 0.5099055171012878,
"eval_loss": 0.9751997590065002,
"eval_runtime": 13.8646,
"eval_samples_per_second": 473.293,
"eval_steps_per_second": 59.216,
"step": 400
},
{
"epoch": 0.12,
"learning_rate": 4.938423645320197e-05,
"loss": 1.036,
"step": 500
},
{
"epoch": 0.15,
"eval_accuracy": 0.5175251364707947,
"eval_loss": 0.9599342942237854,
"eval_runtime": 13.9722,
"eval_samples_per_second": 469.648,
"eval_steps_per_second": 58.76,
"step": 600
},
{
"epoch": 0.2,
"eval_accuracy": 0.5405364036560059,
"eval_loss": 0.9497725367546082,
"eval_runtime": 13.977,
"eval_samples_per_second": 469.486,
"eval_steps_per_second": 58.739,
"step": 800
},
{
"epoch": 0.25,
"learning_rate": 4.876847290640394e-05,
"loss": 0.9512,
"step": 1000
},
{
"epoch": 0.25,
"eval_accuracy": 0.5626333355903625,
"eval_loss": 0.9136764407157898,
"eval_runtime": 13.8916,
"eval_samples_per_second": 472.373,
"eval_steps_per_second": 59.101,
"step": 1000
},
{
"epoch": 0.3,
"eval_accuracy": 0.5790917277336121,
"eval_loss": 0.9000873565673828,
"eval_runtime": 13.9157,
"eval_samples_per_second": 471.554,
"eval_steps_per_second": 58.998,
"step": 1200
},
{
"epoch": 0.34,
"eval_accuracy": 0.5789393186569214,
"eval_loss": 0.8979566097259521,
"eval_runtime": 13.9289,
"eval_samples_per_second": 471.106,
"eval_steps_per_second": 58.942,
"step": 1400
},
{
"epoch": 0.37,
"learning_rate": 4.8152709359605915e-05,
"loss": 0.9165,
"step": 1500
},
{
"epoch": 0.39,
"eval_accuracy": 0.5932642221450806,
"eval_loss": 0.8788071870803833,
"eval_runtime": 13.9191,
"eval_samples_per_second": 471.437,
"eval_steps_per_second": 58.984,
"step": 1600
},
{
"epoch": 0.44,
"eval_accuracy": 0.5729960203170776,
"eval_loss": 0.9093856811523438,
"eval_runtime": 13.9251,
"eval_samples_per_second": 471.237,
"eval_steps_per_second": 58.958,
"step": 1800
},
{
"epoch": 0.49,
"learning_rate": 4.753694581280788e-05,
"loss": 0.8915,
"step": 2000
},
{
"epoch": 0.49,
"eval_accuracy": 0.5684242844581604,
"eval_loss": 0.8900429606437683,
"eval_runtime": 13.875,
"eval_samples_per_second": 472.938,
"eval_steps_per_second": 59.171,
"step": 2000
},
{
"epoch": 0.54,
"eval_accuracy": 0.606217622756958,
"eval_loss": 0.8568419218063354,
"eval_runtime": 13.8846,
"eval_samples_per_second": 472.611,
"eval_steps_per_second": 59.13,
"step": 2200
},
{
"epoch": 0.59,
"eval_accuracy": 0.6046937108039856,
"eval_loss": 0.8561736345291138,
"eval_runtime": 13.8666,
"eval_samples_per_second": 473.223,
"eval_steps_per_second": 59.207,
"step": 2400
},
{
"epoch": 0.62,
"learning_rate": 4.6921182266009855e-05,
"loss": 0.873,
"step": 2500
},
{
"epoch": 0.64,
"eval_accuracy": 0.6234380006790161,
"eval_loss": 0.8509392142295837,
"eval_runtime": 13.8669,
"eval_samples_per_second": 473.214,
"eval_steps_per_second": 59.206,
"step": 2600
},
{
"epoch": 0.69,
"eval_accuracy": 0.6031697392463684,
"eval_loss": 0.8599078059196472,
"eval_runtime": 13.8611,
"eval_samples_per_second": 473.412,
"eval_steps_per_second": 59.231,
"step": 2800
},
{
"epoch": 0.74,
"learning_rate": 4.630541871921182e-05,
"loss": 0.861,
"step": 3000
},
{
"epoch": 0.74,
"eval_accuracy": 0.6165803074836731,
"eval_loss": 0.8439797759056091,
"eval_runtime": 13.8579,
"eval_samples_per_second": 473.522,
"eval_steps_per_second": 59.244,
"step": 3000
},
{
"epoch": 0.79,
"eval_accuracy": 0.6261810660362244,
"eval_loss": 0.8294622898101807,
"eval_runtime": 13.8888,
"eval_samples_per_second": 472.468,
"eval_steps_per_second": 59.113,
"step": 3200
},
{
"epoch": 0.84,
"eval_accuracy": 0.6272478103637695,
"eval_loss": 0.8390009999275208,
"eval_runtime": 13.8984,
"eval_samples_per_second": 472.14,
"eval_steps_per_second": 59.072,
"step": 3400
},
{
"epoch": 0.86,
"learning_rate": 4.5689655172413794e-05,
"loss": 0.8449,
"step": 3500
},
{
"epoch": 0.89,
"eval_accuracy": 0.6289241313934326,
"eval_loss": 0.8257491588592529,
"eval_runtime": 13.9296,
"eval_samples_per_second": 471.084,
"eval_steps_per_second": 58.939,
"step": 3600
},
{
"epoch": 0.94,
"eval_accuracy": 0.6309052109718323,
"eval_loss": 0.81971675157547,
"eval_runtime": 13.9569,
"eval_samples_per_second": 470.163,
"eval_steps_per_second": 58.824,
"step": 3800
},
{
"epoch": 0.99,
"learning_rate": 4.507389162561577e-05,
"loss": 0.8409,
"step": 4000
},
{
"epoch": 0.99,
"eval_accuracy": 0.6334958672523499,
"eval_loss": 0.8270503878593445,
"eval_runtime": 13.9849,
"eval_samples_per_second": 469.221,
"eval_steps_per_second": 58.706,
"step": 4000
},
{
"epoch": 1.03,
"eval_accuracy": 0.6324291229248047,
"eval_loss": 0.8238465785980225,
"eval_runtime": 13.986,
"eval_samples_per_second": 469.184,
"eval_steps_per_second": 58.702,
"step": 4200
},
{
"epoch": 1.08,
"eval_accuracy": 0.6322767734527588,
"eval_loss": 0.8234522342681885,
"eval_runtime": 13.9817,
"eval_samples_per_second": 469.329,
"eval_steps_per_second": 58.72,
"step": 4400
},
{
"epoch": 1.11,
"learning_rate": 4.4458128078817734e-05,
"loss": 0.7883,
"step": 4500
},
{
"epoch": 1.13,
"eval_accuracy": 0.6405059695243835,
"eval_loss": 0.8090473413467407,
"eval_runtime": 13.9835,
"eval_samples_per_second": 469.267,
"eval_steps_per_second": 58.712,
"step": 4600
},
{
"epoch": 1.18,
"eval_accuracy": 0.635019838809967,
"eval_loss": 0.8067367076873779,
"eval_runtime": 13.9898,
"eval_samples_per_second": 469.056,
"eval_steps_per_second": 58.686,
"step": 4800
},
{
"epoch": 1.23,
"learning_rate": 4.384236453201971e-05,
"loss": 0.7789,
"step": 5000
},
{
"epoch": 1.23,
"eval_accuracy": 0.6330386996269226,
"eval_loss": 0.8147866129875183,
"eval_runtime": 13.941,
"eval_samples_per_second": 470.699,
"eval_steps_per_second": 58.891,
"step": 5000
},
{
"epoch": 1.28,
"eval_accuracy": 0.6482779383659363,
"eval_loss": 0.7999472618103027,
"eval_runtime": 13.8888,
"eval_samples_per_second": 472.466,
"eval_steps_per_second": 59.112,
"step": 5200
},
{
"epoch": 1.33,
"eval_accuracy": 0.6427918076515198,
"eval_loss": 0.8316212296485901,
"eval_runtime": 13.8743,
"eval_samples_per_second": 472.96,
"eval_steps_per_second": 59.174,
"step": 5400
},
{
"epoch": 1.35,
"learning_rate": 4.3226600985221674e-05,
"loss": 0.7644,
"step": 5500
},
{
"epoch": 1.38,
"eval_accuracy": 0.6462968587875366,
"eval_loss": 0.8003305196762085,
"eval_runtime": 13.8974,
"eval_samples_per_second": 472.173,
"eval_steps_per_second": 59.076,
"step": 5600
},
{
"epoch": 1.43,
"eval_accuracy": 0.6531545519828796,
"eval_loss": 0.7893626689910889,
"eval_runtime": 13.9226,
"eval_samples_per_second": 471.319,
"eval_steps_per_second": 58.969,
"step": 5800
},
{
"epoch": 1.48,
"learning_rate": 4.261083743842365e-05,
"loss": 0.7648,
"step": 6000
},
{
"epoch": 1.48,
"eval_accuracy": 0.6549832224845886,
"eval_loss": 0.7921696305274963,
"eval_runtime": 13.9392,
"eval_samples_per_second": 470.76,
"eval_steps_per_second": 58.899,
"step": 6000
},
{
"epoch": 1.53,
"eval_accuracy": 0.6469064354896545,
"eval_loss": 0.8024189472198486,
"eval_runtime": 13.9187,
"eval_samples_per_second": 471.45,
"eval_steps_per_second": 58.985,
"step": 6200
},
{
"epoch": 1.58,
"eval_accuracy": 0.6566595435142517,
"eval_loss": 0.7816490530967712,
"eval_runtime": 13.9487,
"eval_samples_per_second": 470.437,
"eval_steps_per_second": 58.858,
"step": 6400
},
{
"epoch": 1.6,
"learning_rate": 4.199507389162562e-05,
"loss": 0.765,
"step": 6500
},
{
"epoch": 1.63,
"eval_accuracy": 0.6543736457824707,
"eval_loss": 0.7835971117019653,
"eval_runtime": 13.8588,
"eval_samples_per_second": 473.491,
"eval_steps_per_second": 59.24,
"step": 6600
},
{
"epoch": 1.67,
"eval_accuracy": 0.6411154866218567,
"eval_loss": 0.8067611455917358,
"eval_runtime": 13.8687,
"eval_samples_per_second": 473.153,
"eval_steps_per_second": 59.198,
"step": 6800
},
{
"epoch": 1.72,
"learning_rate": 4.1379310344827587e-05,
"loss": 0.7583,
"step": 7000
},
{
"epoch": 1.72,
"eval_accuracy": 0.6603170037269592,
"eval_loss": 0.7717912197113037,
"eval_runtime": 13.8664,
"eval_samples_per_second": 473.231,
"eval_steps_per_second": 59.208,
"step": 7000
},
{
"epoch": 1.77,
"eval_accuracy": 0.6635172367095947,
"eval_loss": 0.7803527116775513,
"eval_runtime": 13.8557,
"eval_samples_per_second": 473.596,
"eval_steps_per_second": 59.254,
"step": 7200
},
{
"epoch": 1.82,
"eval_accuracy": 0.6674794554710388,
"eval_loss": 0.7677510380744934,
"eval_runtime": 13.8768,
"eval_samples_per_second": 472.875,
"eval_steps_per_second": 59.163,
"step": 7400
},
{
"epoch": 1.85,
"learning_rate": 4.076354679802955e-05,
"loss": 0.7584,
"step": 7500
},
{
"epoch": 1.87,
"eval_accuracy": 0.6589454412460327,
"eval_loss": 0.7867729067802429,
"eval_runtime": 13.9787,
"eval_samples_per_second": 469.43,
"eval_steps_per_second": 58.732,
"step": 7600
},
{
"epoch": 1.92,
"eval_accuracy": 0.6604693531990051,
"eval_loss": 0.7763269543647766,
"eval_runtime": 13.9708,
"eval_samples_per_second": 469.693,
"eval_steps_per_second": 58.765,
"step": 7800
},
{
"epoch": 1.97,
"learning_rate": 4.014778325123153e-05,
"loss": 0.7515,
"step": 8000
},
{
"epoch": 1.97,
"eval_accuracy": 0.6673270463943481,
"eval_loss": 0.7527127265930176,
"eval_runtime": 13.9175,
"eval_samples_per_second": 471.491,
"eval_steps_per_second": 58.99,
"step": 8000
},
{
"epoch": 2.02,
"eval_accuracy": 0.6624504923820496,
"eval_loss": 0.8022358417510986,
"eval_runtime": 13.9295,
"eval_samples_per_second": 471.086,
"eval_steps_per_second": 58.94,
"step": 8200
},
{
"epoch": 2.07,
"eval_accuracy": 0.6630600690841675,
"eval_loss": 0.7974384427070618,
"eval_runtime": 13.9703,
"eval_samples_per_second": 469.711,
"eval_steps_per_second": 58.768,
"step": 8400
},
{
"epoch": 2.09,
"learning_rate": 3.95320197044335e-05,
"loss": 0.6779,
"step": 8500
},
{
"epoch": 2.12,
"eval_accuracy": 0.6708320379257202,
"eval_loss": 0.768686830997467,
"eval_runtime": 13.9807,
"eval_samples_per_second": 469.361,
"eval_steps_per_second": 58.724,
"step": 8600
},
{
"epoch": 2.17,
"eval_accuracy": 0.6661078929901123,
"eval_loss": 0.7770901322364807,
"eval_runtime": 13.9743,
"eval_samples_per_second": 469.578,
"eval_steps_per_second": 58.751,
"step": 8800
},
{
"epoch": 2.22,
"learning_rate": 3.891625615763547e-05,
"loss": 0.6587,
"step": 9000
},
{
"epoch": 2.22,
"eval_accuracy": 0.6674794554710388,
"eval_loss": 0.7751796245574951,
"eval_runtime": 13.9722,
"eval_samples_per_second": 469.647,
"eval_steps_per_second": 58.76,
"step": 9000
},
{
"epoch": 2.27,
"eval_accuracy": 0.6688509583473206,
"eval_loss": 0.7814744114875793,
"eval_runtime": 13.9424,
"eval_samples_per_second": 470.652,
"eval_steps_per_second": 58.885,
"step": 9200
},
{
"epoch": 2.32,
"eval_accuracy": 0.6693081259727478,
"eval_loss": 0.7871124744415283,
"eval_runtime": 13.9317,
"eval_samples_per_second": 471.012,
"eval_steps_per_second": 58.93,
"step": 9400
},
{
"epoch": 2.34,
"learning_rate": 3.830049261083744e-05,
"loss": 0.6662,
"step": 9500
},
{
"epoch": 2.36,
"eval_accuracy": 0.6717464327812195,
"eval_loss": 0.768136203289032,
"eval_runtime": 13.9811,
"eval_samples_per_second": 469.347,
"eval_steps_per_second": 58.722,
"step": 9600
},
{
"epoch": 2.41,
"eval_accuracy": 0.6755562424659729,
"eval_loss": 0.7916134595870972,
"eval_runtime": 13.9048,
"eval_samples_per_second": 471.925,
"eval_steps_per_second": 59.045,
"step": 9800
},
{
"epoch": 2.46,
"learning_rate": 3.768472906403941e-05,
"loss": 0.6585,
"step": 10000
},
{
"epoch": 2.46,
"eval_accuracy": 0.6796708106994629,
"eval_loss": 0.7765262722969055,
"eval_runtime": 13.8808,
"eval_samples_per_second": 472.74,
"eval_steps_per_second": 59.146,
"step": 10000
},
{
"epoch": 2.51,
"eval_accuracy": 0.6746419072151184,
"eval_loss": 0.7696249485015869,
"eval_runtime": 13.8943,
"eval_samples_per_second": 472.28,
"eval_steps_per_second": 59.089,
"step": 10200
},
{
"epoch": 2.56,
"eval_accuracy": 0.6705272793769836,
"eval_loss": 0.795970618724823,
"eval_runtime": 13.923,
"eval_samples_per_second": 471.308,
"eval_steps_per_second": 58.967,
"step": 10400
},
{
"epoch": 2.59,
"learning_rate": 3.7068965517241385e-05,
"loss": 0.6648,
"step": 10500
},
{
"epoch": 2.61,
"eval_accuracy": 0.681347131729126,
"eval_loss": 0.7532095909118652,
"eval_runtime": 13.9308,
"eval_samples_per_second": 471.042,
"eval_steps_per_second": 58.934,
"step": 10600
},
{
"epoch": 2.66,
"eval_accuracy": 0.6834806203842163,
"eval_loss": 0.7632550001144409,
"eval_runtime": 13.9049,
"eval_samples_per_second": 471.921,
"eval_steps_per_second": 59.044,
"step": 10800
},
{
"epoch": 2.71,
"learning_rate": 3.645320197044335e-05,
"loss": 0.6663,
"step": 11000
},
{
"epoch": 2.71,
"eval_accuracy": 0.6773849725723267,
"eval_loss": 0.7755422592163086,
"eval_runtime": 13.8928,
"eval_samples_per_second": 472.331,
"eval_steps_per_second": 59.095,
"step": 11000
},
{
"epoch": 2.76,
"eval_accuracy": 0.6784517168998718,
"eval_loss": 0.7512595653533936,
"eval_runtime": 13.8668,
"eval_samples_per_second": 473.218,
"eval_steps_per_second": 59.206,
"step": 11200
},
{
"epoch": 2.81,
"eval_accuracy": 0.6869856715202332,
"eval_loss": 0.7553817629814148,
"eval_runtime": 13.8821,
"eval_samples_per_second": 472.693,
"eval_steps_per_second": 59.141,
"step": 11400
},
{
"epoch": 2.83,
"learning_rate": 3.583743842364532e-05,
"loss": 0.6645,
"step": 11500
},
{
"epoch": 2.86,
"eval_accuracy": 0.6833282709121704,
"eval_loss": 0.7605084776878357,
"eval_runtime": 13.8864,
"eval_samples_per_second": 472.548,
"eval_steps_per_second": 59.122,
"step": 11600
},
{
"epoch": 2.91,
"eval_accuracy": 0.681347131729126,
"eval_loss": 0.7520666718482971,
"eval_runtime": 13.9251,
"eval_samples_per_second": 471.236,
"eval_steps_per_second": 58.958,
"step": 11800
},
{
"epoch": 2.96,
"learning_rate": 3.522167487684729e-05,
"loss": 0.6596,
"step": 12000
},
{
"epoch": 2.96,
"eval_accuracy": 0.6738799214363098,
"eval_loss": 0.7592176795005798,
"eval_runtime": 13.9705,
"eval_samples_per_second": 469.705,
"eval_steps_per_second": 58.767,
"step": 12000
},
{
"epoch": 3.0,
"eval_accuracy": 0.6863760948181152,
"eval_loss": 0.7859818935394287,
"eval_runtime": 13.9639,
"eval_samples_per_second": 469.927,
"eval_steps_per_second": 58.795,
"step": 12200
},
{
"epoch": 3.05,
"eval_accuracy": 0.6819567084312439,
"eval_loss": 0.7805635929107666,
"eval_runtime": 13.8834,
"eval_samples_per_second": 472.651,
"eval_steps_per_second": 59.135,
"step": 12400
},
{
"epoch": 3.08,
"learning_rate": 3.4605911330049265e-05,
"loss": 0.5974,
"step": 12500
},
{
"epoch": 3.1,
"eval_accuracy": 0.6827186942100525,
"eval_loss": 0.8164608478546143,
"eval_runtime": 13.879,
"eval_samples_per_second": 472.8,
"eval_steps_per_second": 59.154,
"step": 12600
},
{
"epoch": 3.15,
"eval_accuracy": 0.6831758618354797,
"eval_loss": 0.7926530838012695,
"eval_runtime": 13.8407,
"eval_samples_per_second": 474.109,
"eval_steps_per_second": 59.318,
"step": 12800
},
{
"epoch": 3.2,
"learning_rate": 3.399014778325123e-05,
"loss": 0.5539,
"step": 13000
},
{
"epoch": 3.2,
"eval_accuracy": 0.6781468987464905,
"eval_loss": 0.8301470279693604,
"eval_runtime": 13.8501,
"eval_samples_per_second": 473.786,
"eval_steps_per_second": 59.277,
"step": 13000
},
{
"epoch": 3.25,
"eval_accuracy": 0.6764705777168274,
"eval_loss": 0.8108323812484741,
"eval_runtime": 13.8741,
"eval_samples_per_second": 472.969,
"eval_steps_per_second": 59.175,
"step": 13200
},
{
"epoch": 3.3,
"eval_accuracy": 0.6892715692520142,
"eval_loss": 0.8093796968460083,
"eval_runtime": 13.8736,
"eval_samples_per_second": 472.983,
"eval_steps_per_second": 59.177,
"step": 13400
},
{
"epoch": 3.33,
"learning_rate": 3.3374384236453204e-05,
"loss": 0.5556,
"step": 13500
},
{
"epoch": 3.35,
"eval_accuracy": 0.6766229867935181,
"eval_loss": 0.8061802983283997,
"eval_runtime": 13.8439,
"eval_samples_per_second": 474.0,
"eval_steps_per_second": 59.304,
"step": 13600
},
{
"epoch": 3.4,
"eval_accuracy": 0.6857665181159973,
"eval_loss": 0.8102853894233704,
"eval_runtime": 13.8952,
"eval_samples_per_second": 472.25,
"eval_steps_per_second": 59.085,
"step": 13800
},
{
"epoch": 3.45,
"learning_rate": 3.275862068965517e-05,
"loss": 0.561,
"step": 14000
},
{
"epoch": 3.45,
"eval_accuracy": 0.6811947822570801,
"eval_loss": 0.7732057571411133,
"eval_runtime": 13.9222,
"eval_samples_per_second": 471.335,
"eval_steps_per_second": 58.971,
"step": 14000
},
{
"epoch": 3.5,
"eval_accuracy": 0.6784517168998718,
"eval_loss": 0.8165723085403442,
"eval_runtime": 13.9595,
"eval_samples_per_second": 470.073,
"eval_steps_per_second": 58.813,
"step": 14200
},
{
"epoch": 3.55,
"eval_accuracy": 0.6802803874015808,
"eval_loss": 0.807296097278595,
"eval_runtime": 14.0128,
"eval_samples_per_second": 468.288,
"eval_steps_per_second": 58.589,
"step": 14400
},
{
"epoch": 3.57,
"learning_rate": 3.2142857142857144e-05,
"loss": 0.5708,
"step": 14500
},
{
"epoch": 3.6,
"eval_accuracy": 0.6888144016265869,
"eval_loss": 0.796098530292511,
"eval_runtime": 13.9315,
"eval_samples_per_second": 471.019,
"eval_steps_per_second": 58.931,
"step": 14600
},
{
"epoch": 3.65,
"eval_accuracy": 0.6831758618354797,
"eval_loss": 0.7848635315895081,
"eval_runtime": 13.9748,
"eval_samples_per_second": 469.561,
"eval_steps_per_second": 58.749,
"step": 14800
},
{
"epoch": 3.69,
"learning_rate": 3.152709359605912e-05,
"loss": 0.5664,
"step": 15000
},
{
"epoch": 3.69,
"eval_accuracy": 0.6843950152397156,
"eval_loss": 0.8060910105705261,
"eval_runtime": 13.9727,
"eval_samples_per_second": 469.631,
"eval_steps_per_second": 58.758,
"step": 15000
},
{
"epoch": 3.74,
"eval_accuracy": 0.6804327964782715,
"eval_loss": 0.7997169494628906,
"eval_runtime": 13.9901,
"eval_samples_per_second": 469.045,
"eval_steps_per_second": 58.684,
"step": 15200
},
{
"epoch": 3.79,
"eval_accuracy": 0.6877476572990417,
"eval_loss": 0.7792339324951172,
"eval_runtime": 13.9337,
"eval_samples_per_second": 470.946,
"eval_steps_per_second": 58.922,
"step": 15400
},
{
"epoch": 3.79,
"step": 15400,
"total_flos": 9.026901710390784e+16,
"train_loss": 0.7314797597117239,
"train_runtime": 6341.8601,
"train_samples_per_second": 409.627,
"train_steps_per_second": 6.402
}
],
"max_steps": 40600,
"num_train_epochs": 10,
"total_flos": 9.026901710390784e+16,
"trial_name": null,
"trial_params": null
}