{ "best_metric": 0.6892715692520142, "best_model_checkpoint": "final_models/glue_models/structroberta_s2_50ep//finetune/mnli/checkpoint-13400", "epoch": 3.793103448275862, "global_step": 15400, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05, "eval_accuracy": 0.453520268201828, "eval_loss": 1.0527480840682983, "eval_runtime": 13.8459, "eval_samples_per_second": 473.93, "eval_steps_per_second": 59.295, "step": 200 }, { "epoch": 0.1, "eval_accuracy": 0.5099055171012878, "eval_loss": 0.9751997590065002, "eval_runtime": 13.8646, "eval_samples_per_second": 473.293, "eval_steps_per_second": 59.216, "step": 400 }, { "epoch": 0.12, "learning_rate": 4.938423645320197e-05, "loss": 1.036, "step": 500 }, { "epoch": 0.15, "eval_accuracy": 0.5175251364707947, "eval_loss": 0.9599342942237854, "eval_runtime": 13.9722, "eval_samples_per_second": 469.648, "eval_steps_per_second": 58.76, "step": 600 }, { "epoch": 0.2, "eval_accuracy": 0.5405364036560059, "eval_loss": 0.9497725367546082, "eval_runtime": 13.977, "eval_samples_per_second": 469.486, "eval_steps_per_second": 58.739, "step": 800 }, { "epoch": 0.25, "learning_rate": 4.876847290640394e-05, "loss": 0.9512, "step": 1000 }, { "epoch": 0.25, "eval_accuracy": 0.5626333355903625, "eval_loss": 0.9136764407157898, "eval_runtime": 13.8916, "eval_samples_per_second": 472.373, "eval_steps_per_second": 59.101, "step": 1000 }, { "epoch": 0.3, "eval_accuracy": 0.5790917277336121, "eval_loss": 0.9000873565673828, "eval_runtime": 13.9157, "eval_samples_per_second": 471.554, "eval_steps_per_second": 58.998, "step": 1200 }, { "epoch": 0.34, "eval_accuracy": 0.5789393186569214, "eval_loss": 0.8979566097259521, "eval_runtime": 13.9289, "eval_samples_per_second": 471.106, "eval_steps_per_second": 58.942, "step": 1400 }, { "epoch": 0.37, "learning_rate": 4.8152709359605915e-05, "loss": 0.9165, "step": 1500 }, { "epoch": 0.39, "eval_accuracy": 0.5932642221450806, "eval_loss": 0.8788071870803833, "eval_runtime": 13.9191, "eval_samples_per_second": 471.437, "eval_steps_per_second": 58.984, "step": 1600 }, { "epoch": 0.44, "eval_accuracy": 0.5729960203170776, "eval_loss": 0.9093856811523438, "eval_runtime": 13.9251, "eval_samples_per_second": 471.237, "eval_steps_per_second": 58.958, "step": 1800 }, { "epoch": 0.49, "learning_rate": 4.753694581280788e-05, "loss": 0.8915, "step": 2000 }, { "epoch": 0.49, "eval_accuracy": 0.5684242844581604, "eval_loss": 0.8900429606437683, "eval_runtime": 13.875, "eval_samples_per_second": 472.938, "eval_steps_per_second": 59.171, "step": 2000 }, { "epoch": 0.54, "eval_accuracy": 0.606217622756958, "eval_loss": 0.8568419218063354, "eval_runtime": 13.8846, "eval_samples_per_second": 472.611, "eval_steps_per_second": 59.13, "step": 2200 }, { "epoch": 0.59, "eval_accuracy": 0.6046937108039856, "eval_loss": 0.8561736345291138, "eval_runtime": 13.8666, "eval_samples_per_second": 473.223, "eval_steps_per_second": 59.207, "step": 2400 }, { "epoch": 0.62, "learning_rate": 4.6921182266009855e-05, "loss": 0.873, "step": 2500 }, { "epoch": 0.64, "eval_accuracy": 0.6234380006790161, "eval_loss": 0.8509392142295837, "eval_runtime": 13.8669, "eval_samples_per_second": 473.214, "eval_steps_per_second": 59.206, "step": 2600 }, { "epoch": 0.69, "eval_accuracy": 0.6031697392463684, "eval_loss": 0.8599078059196472, "eval_runtime": 13.8611, "eval_samples_per_second": 473.412, "eval_steps_per_second": 59.231, "step": 2800 }, { "epoch": 0.74, "learning_rate": 4.630541871921182e-05, "loss": 0.861, "step": 3000 }, { "epoch": 0.74, "eval_accuracy": 0.6165803074836731, "eval_loss": 0.8439797759056091, "eval_runtime": 13.8579, "eval_samples_per_second": 473.522, "eval_steps_per_second": 59.244, "step": 3000 }, { "epoch": 0.79, "eval_accuracy": 0.6261810660362244, "eval_loss": 0.8294622898101807, "eval_runtime": 13.8888, "eval_samples_per_second": 472.468, "eval_steps_per_second": 59.113, "step": 3200 }, { "epoch": 0.84, "eval_accuracy": 0.6272478103637695, "eval_loss": 0.8390009999275208, "eval_runtime": 13.8984, "eval_samples_per_second": 472.14, "eval_steps_per_second": 59.072, "step": 3400 }, { "epoch": 0.86, "learning_rate": 4.5689655172413794e-05, "loss": 0.8449, "step": 3500 }, { "epoch": 0.89, "eval_accuracy": 0.6289241313934326, "eval_loss": 0.8257491588592529, "eval_runtime": 13.9296, "eval_samples_per_second": 471.084, "eval_steps_per_second": 58.939, "step": 3600 }, { "epoch": 0.94, "eval_accuracy": 0.6309052109718323, "eval_loss": 0.81971675157547, "eval_runtime": 13.9569, "eval_samples_per_second": 470.163, "eval_steps_per_second": 58.824, "step": 3800 }, { "epoch": 0.99, "learning_rate": 4.507389162561577e-05, "loss": 0.8409, "step": 4000 }, { "epoch": 0.99, "eval_accuracy": 0.6334958672523499, "eval_loss": 0.8270503878593445, "eval_runtime": 13.9849, "eval_samples_per_second": 469.221, "eval_steps_per_second": 58.706, "step": 4000 }, { "epoch": 1.03, "eval_accuracy": 0.6324291229248047, "eval_loss": 0.8238465785980225, "eval_runtime": 13.986, "eval_samples_per_second": 469.184, "eval_steps_per_second": 58.702, "step": 4200 }, { "epoch": 1.08, "eval_accuracy": 0.6322767734527588, "eval_loss": 0.8234522342681885, "eval_runtime": 13.9817, "eval_samples_per_second": 469.329, "eval_steps_per_second": 58.72, "step": 4400 }, { "epoch": 1.11, "learning_rate": 4.4458128078817734e-05, "loss": 0.7883, "step": 4500 }, { "epoch": 1.13, "eval_accuracy": 0.6405059695243835, "eval_loss": 0.8090473413467407, "eval_runtime": 13.9835, "eval_samples_per_second": 469.267, "eval_steps_per_second": 58.712, "step": 4600 }, { "epoch": 1.18, "eval_accuracy": 0.635019838809967, "eval_loss": 0.8067367076873779, "eval_runtime": 13.9898, "eval_samples_per_second": 469.056, "eval_steps_per_second": 58.686, "step": 4800 }, { "epoch": 1.23, "learning_rate": 4.384236453201971e-05, "loss": 0.7789, "step": 5000 }, { "epoch": 1.23, "eval_accuracy": 0.6330386996269226, "eval_loss": 0.8147866129875183, "eval_runtime": 13.941, "eval_samples_per_second": 470.699, "eval_steps_per_second": 58.891, "step": 5000 }, { "epoch": 1.28, "eval_accuracy": 0.6482779383659363, "eval_loss": 0.7999472618103027, "eval_runtime": 13.8888, "eval_samples_per_second": 472.466, "eval_steps_per_second": 59.112, "step": 5200 }, { "epoch": 1.33, "eval_accuracy": 0.6427918076515198, "eval_loss": 0.8316212296485901, "eval_runtime": 13.8743, "eval_samples_per_second": 472.96, "eval_steps_per_second": 59.174, "step": 5400 }, { "epoch": 1.35, "learning_rate": 4.3226600985221674e-05, "loss": 0.7644, "step": 5500 }, { "epoch": 1.38, "eval_accuracy": 0.6462968587875366, "eval_loss": 0.8003305196762085, "eval_runtime": 13.8974, "eval_samples_per_second": 472.173, "eval_steps_per_second": 59.076, "step": 5600 }, { "epoch": 1.43, "eval_accuracy": 0.6531545519828796, "eval_loss": 0.7893626689910889, "eval_runtime": 13.9226, "eval_samples_per_second": 471.319, "eval_steps_per_second": 58.969, "step": 5800 }, { "epoch": 1.48, "learning_rate": 4.261083743842365e-05, "loss": 0.7648, "step": 6000 }, { "epoch": 1.48, "eval_accuracy": 0.6549832224845886, "eval_loss": 0.7921696305274963, "eval_runtime": 13.9392, "eval_samples_per_second": 470.76, "eval_steps_per_second": 58.899, "step": 6000 }, { "epoch": 1.53, "eval_accuracy": 0.6469064354896545, "eval_loss": 0.8024189472198486, "eval_runtime": 13.9187, "eval_samples_per_second": 471.45, "eval_steps_per_second": 58.985, "step": 6200 }, { "epoch": 1.58, "eval_accuracy": 0.6566595435142517, "eval_loss": 0.7816490530967712, "eval_runtime": 13.9487, "eval_samples_per_second": 470.437, "eval_steps_per_second": 58.858, "step": 6400 }, { "epoch": 1.6, "learning_rate": 4.199507389162562e-05, "loss": 0.765, "step": 6500 }, { "epoch": 1.63, "eval_accuracy": 0.6543736457824707, "eval_loss": 0.7835971117019653, "eval_runtime": 13.8588, "eval_samples_per_second": 473.491, "eval_steps_per_second": 59.24, "step": 6600 }, { "epoch": 1.67, "eval_accuracy": 0.6411154866218567, "eval_loss": 0.8067611455917358, "eval_runtime": 13.8687, "eval_samples_per_second": 473.153, "eval_steps_per_second": 59.198, "step": 6800 }, { "epoch": 1.72, "learning_rate": 4.1379310344827587e-05, "loss": 0.7583, "step": 7000 }, { "epoch": 1.72, "eval_accuracy": 0.6603170037269592, "eval_loss": 0.7717912197113037, "eval_runtime": 13.8664, "eval_samples_per_second": 473.231, "eval_steps_per_second": 59.208, "step": 7000 }, { "epoch": 1.77, "eval_accuracy": 0.6635172367095947, "eval_loss": 0.7803527116775513, "eval_runtime": 13.8557, "eval_samples_per_second": 473.596, "eval_steps_per_second": 59.254, "step": 7200 }, { "epoch": 1.82, "eval_accuracy": 0.6674794554710388, "eval_loss": 0.7677510380744934, "eval_runtime": 13.8768, "eval_samples_per_second": 472.875, "eval_steps_per_second": 59.163, "step": 7400 }, { "epoch": 1.85, "learning_rate": 4.076354679802955e-05, "loss": 0.7584, "step": 7500 }, { "epoch": 1.87, "eval_accuracy": 0.6589454412460327, "eval_loss": 0.7867729067802429, "eval_runtime": 13.9787, "eval_samples_per_second": 469.43, "eval_steps_per_second": 58.732, "step": 7600 }, { "epoch": 1.92, "eval_accuracy": 0.6604693531990051, "eval_loss": 0.7763269543647766, "eval_runtime": 13.9708, "eval_samples_per_second": 469.693, "eval_steps_per_second": 58.765, "step": 7800 }, { "epoch": 1.97, "learning_rate": 4.014778325123153e-05, "loss": 0.7515, "step": 8000 }, { "epoch": 1.97, "eval_accuracy": 0.6673270463943481, "eval_loss": 0.7527127265930176, "eval_runtime": 13.9175, "eval_samples_per_second": 471.491, "eval_steps_per_second": 58.99, "step": 8000 }, { "epoch": 2.02, "eval_accuracy": 0.6624504923820496, "eval_loss": 0.8022358417510986, "eval_runtime": 13.9295, "eval_samples_per_second": 471.086, "eval_steps_per_second": 58.94, "step": 8200 }, { "epoch": 2.07, "eval_accuracy": 0.6630600690841675, "eval_loss": 0.7974384427070618, "eval_runtime": 13.9703, "eval_samples_per_second": 469.711, "eval_steps_per_second": 58.768, "step": 8400 }, { "epoch": 2.09, "learning_rate": 3.95320197044335e-05, "loss": 0.6779, "step": 8500 }, { "epoch": 2.12, "eval_accuracy": 0.6708320379257202, "eval_loss": 0.768686830997467, "eval_runtime": 13.9807, "eval_samples_per_second": 469.361, "eval_steps_per_second": 58.724, "step": 8600 }, { "epoch": 2.17, "eval_accuracy": 0.6661078929901123, "eval_loss": 0.7770901322364807, "eval_runtime": 13.9743, "eval_samples_per_second": 469.578, "eval_steps_per_second": 58.751, "step": 8800 }, { "epoch": 2.22, "learning_rate": 3.891625615763547e-05, "loss": 0.6587, "step": 9000 }, { "epoch": 2.22, "eval_accuracy": 0.6674794554710388, "eval_loss": 0.7751796245574951, "eval_runtime": 13.9722, "eval_samples_per_second": 469.647, "eval_steps_per_second": 58.76, "step": 9000 }, { "epoch": 2.27, "eval_accuracy": 0.6688509583473206, "eval_loss": 0.7814744114875793, "eval_runtime": 13.9424, "eval_samples_per_second": 470.652, "eval_steps_per_second": 58.885, "step": 9200 }, { "epoch": 2.32, "eval_accuracy": 0.6693081259727478, "eval_loss": 0.7871124744415283, "eval_runtime": 13.9317, "eval_samples_per_second": 471.012, "eval_steps_per_second": 58.93, "step": 9400 }, { "epoch": 2.34, "learning_rate": 3.830049261083744e-05, "loss": 0.6662, "step": 9500 }, { "epoch": 2.36, "eval_accuracy": 0.6717464327812195, "eval_loss": 0.768136203289032, "eval_runtime": 13.9811, "eval_samples_per_second": 469.347, "eval_steps_per_second": 58.722, "step": 9600 }, { "epoch": 2.41, "eval_accuracy": 0.6755562424659729, "eval_loss": 0.7916134595870972, "eval_runtime": 13.9048, "eval_samples_per_second": 471.925, "eval_steps_per_second": 59.045, "step": 9800 }, { "epoch": 2.46, "learning_rate": 3.768472906403941e-05, "loss": 0.6585, "step": 10000 }, { "epoch": 2.46, "eval_accuracy": 0.6796708106994629, "eval_loss": 0.7765262722969055, "eval_runtime": 13.8808, "eval_samples_per_second": 472.74, "eval_steps_per_second": 59.146, "step": 10000 }, { "epoch": 2.51, "eval_accuracy": 0.6746419072151184, "eval_loss": 0.7696249485015869, "eval_runtime": 13.8943, "eval_samples_per_second": 472.28, "eval_steps_per_second": 59.089, "step": 10200 }, { "epoch": 2.56, "eval_accuracy": 0.6705272793769836, "eval_loss": 0.795970618724823, "eval_runtime": 13.923, "eval_samples_per_second": 471.308, "eval_steps_per_second": 58.967, "step": 10400 }, { "epoch": 2.59, "learning_rate": 3.7068965517241385e-05, "loss": 0.6648, "step": 10500 }, { "epoch": 2.61, "eval_accuracy": 0.681347131729126, "eval_loss": 0.7532095909118652, "eval_runtime": 13.9308, "eval_samples_per_second": 471.042, "eval_steps_per_second": 58.934, "step": 10600 }, { "epoch": 2.66, "eval_accuracy": 0.6834806203842163, "eval_loss": 0.7632550001144409, "eval_runtime": 13.9049, "eval_samples_per_second": 471.921, "eval_steps_per_second": 59.044, "step": 10800 }, { "epoch": 2.71, "learning_rate": 3.645320197044335e-05, "loss": 0.6663, "step": 11000 }, { "epoch": 2.71, "eval_accuracy": 0.6773849725723267, "eval_loss": 0.7755422592163086, "eval_runtime": 13.8928, "eval_samples_per_second": 472.331, "eval_steps_per_second": 59.095, "step": 11000 }, { "epoch": 2.76, "eval_accuracy": 0.6784517168998718, "eval_loss": 0.7512595653533936, "eval_runtime": 13.8668, "eval_samples_per_second": 473.218, "eval_steps_per_second": 59.206, "step": 11200 }, { "epoch": 2.81, "eval_accuracy": 0.6869856715202332, "eval_loss": 0.7553817629814148, "eval_runtime": 13.8821, "eval_samples_per_second": 472.693, "eval_steps_per_second": 59.141, "step": 11400 }, { "epoch": 2.83, "learning_rate": 3.583743842364532e-05, "loss": 0.6645, "step": 11500 }, { "epoch": 2.86, "eval_accuracy": 0.6833282709121704, "eval_loss": 0.7605084776878357, "eval_runtime": 13.8864, "eval_samples_per_second": 472.548, "eval_steps_per_second": 59.122, "step": 11600 }, { "epoch": 2.91, "eval_accuracy": 0.681347131729126, "eval_loss": 0.7520666718482971, "eval_runtime": 13.9251, "eval_samples_per_second": 471.236, "eval_steps_per_second": 58.958, "step": 11800 }, { "epoch": 2.96, "learning_rate": 3.522167487684729e-05, "loss": 0.6596, "step": 12000 }, { "epoch": 2.96, "eval_accuracy": 0.6738799214363098, "eval_loss": 0.7592176795005798, "eval_runtime": 13.9705, "eval_samples_per_second": 469.705, "eval_steps_per_second": 58.767, "step": 12000 }, { "epoch": 3.0, "eval_accuracy": 0.6863760948181152, "eval_loss": 0.7859818935394287, "eval_runtime": 13.9639, "eval_samples_per_second": 469.927, "eval_steps_per_second": 58.795, "step": 12200 }, { "epoch": 3.05, "eval_accuracy": 0.6819567084312439, "eval_loss": 0.7805635929107666, "eval_runtime": 13.8834, "eval_samples_per_second": 472.651, "eval_steps_per_second": 59.135, "step": 12400 }, { "epoch": 3.08, "learning_rate": 3.4605911330049265e-05, "loss": 0.5974, "step": 12500 }, { "epoch": 3.1, "eval_accuracy": 0.6827186942100525, "eval_loss": 0.8164608478546143, "eval_runtime": 13.879, "eval_samples_per_second": 472.8, "eval_steps_per_second": 59.154, "step": 12600 }, { "epoch": 3.15, "eval_accuracy": 0.6831758618354797, "eval_loss": 0.7926530838012695, "eval_runtime": 13.8407, "eval_samples_per_second": 474.109, "eval_steps_per_second": 59.318, "step": 12800 }, { "epoch": 3.2, "learning_rate": 3.399014778325123e-05, "loss": 0.5539, "step": 13000 }, { "epoch": 3.2, "eval_accuracy": 0.6781468987464905, "eval_loss": 0.8301470279693604, "eval_runtime": 13.8501, "eval_samples_per_second": 473.786, "eval_steps_per_second": 59.277, "step": 13000 }, { "epoch": 3.25, "eval_accuracy": 0.6764705777168274, "eval_loss": 0.8108323812484741, "eval_runtime": 13.8741, "eval_samples_per_second": 472.969, "eval_steps_per_second": 59.175, "step": 13200 }, { "epoch": 3.3, "eval_accuracy": 0.6892715692520142, "eval_loss": 0.8093796968460083, "eval_runtime": 13.8736, "eval_samples_per_second": 472.983, "eval_steps_per_second": 59.177, "step": 13400 }, { "epoch": 3.33, "learning_rate": 3.3374384236453204e-05, "loss": 0.5556, "step": 13500 }, { "epoch": 3.35, "eval_accuracy": 0.6766229867935181, "eval_loss": 0.8061802983283997, "eval_runtime": 13.8439, "eval_samples_per_second": 474.0, "eval_steps_per_second": 59.304, "step": 13600 }, { "epoch": 3.4, "eval_accuracy": 0.6857665181159973, "eval_loss": 0.8102853894233704, "eval_runtime": 13.8952, "eval_samples_per_second": 472.25, "eval_steps_per_second": 59.085, "step": 13800 }, { "epoch": 3.45, "learning_rate": 3.275862068965517e-05, "loss": 0.561, "step": 14000 }, { "epoch": 3.45, "eval_accuracy": 0.6811947822570801, "eval_loss": 0.7732057571411133, "eval_runtime": 13.9222, "eval_samples_per_second": 471.335, "eval_steps_per_second": 58.971, "step": 14000 }, { "epoch": 3.5, "eval_accuracy": 0.6784517168998718, "eval_loss": 0.8165723085403442, "eval_runtime": 13.9595, "eval_samples_per_second": 470.073, "eval_steps_per_second": 58.813, "step": 14200 }, { "epoch": 3.55, "eval_accuracy": 0.6802803874015808, "eval_loss": 0.807296097278595, "eval_runtime": 14.0128, "eval_samples_per_second": 468.288, "eval_steps_per_second": 58.589, "step": 14400 }, { "epoch": 3.57, "learning_rate": 3.2142857142857144e-05, "loss": 0.5708, "step": 14500 }, { "epoch": 3.6, "eval_accuracy": 0.6888144016265869, "eval_loss": 0.796098530292511, "eval_runtime": 13.9315, "eval_samples_per_second": 471.019, "eval_steps_per_second": 58.931, "step": 14600 }, { "epoch": 3.65, "eval_accuracy": 0.6831758618354797, "eval_loss": 0.7848635315895081, "eval_runtime": 13.9748, "eval_samples_per_second": 469.561, "eval_steps_per_second": 58.749, "step": 14800 }, { "epoch": 3.69, "learning_rate": 3.152709359605912e-05, "loss": 0.5664, "step": 15000 }, { "epoch": 3.69, "eval_accuracy": 0.6843950152397156, "eval_loss": 0.8060910105705261, "eval_runtime": 13.9727, "eval_samples_per_second": 469.631, "eval_steps_per_second": 58.758, "step": 15000 }, { "epoch": 3.74, "eval_accuracy": 0.6804327964782715, "eval_loss": 0.7997169494628906, "eval_runtime": 13.9901, "eval_samples_per_second": 469.045, "eval_steps_per_second": 58.684, "step": 15200 }, { "epoch": 3.79, "eval_accuracy": 0.6877476572990417, "eval_loss": 0.7792339324951172, "eval_runtime": 13.9337, "eval_samples_per_second": 470.946, "eval_steps_per_second": 58.922, "step": 15400 }, { "epoch": 3.79, "step": 15400, "total_flos": 9.026901710390784e+16, "train_loss": 0.7314797597117239, "train_runtime": 6341.8601, "train_samples_per_second": 409.627, "train_steps_per_second": 6.402 } ], "max_steps": 40600, "num_train_epochs": 10, "total_flos": 9.026901710390784e+16, "trial_name": null, "trial_params": null }