{ "best_metric": 0.6948868632316589, "best_model_checkpoint": "/output/zgt-roberta-base-finetuned-Gu21schedule-BS256-10ep/checkpoint-31926", "epoch": 9.999029597282872, "global_step": 32200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.1, "eval_accuracy": 0.15792088093844922, "eval_loss": 6.227477550506592, "eval_runtime": 1564.1073, "eval_samples_per_second": 65.972, "eval_steps_per_second": 4.124, "step": 313 }, { "epoch": 0.16, "learning_rate": 9.316770186335403e-05, "loss": 7.1011, "step": 500 }, { "epoch": 0.19, "eval_accuracy": 0.17130737364600243, "eval_loss": 6.035614967346191, "eval_runtime": 1566.1128, "eval_samples_per_second": 65.887, "eval_steps_per_second": 4.118, "step": 626 }, { "epoch": 0.29, "eval_accuracy": 0.15421966595333422, "eval_loss": 5.970716953277588, "eval_runtime": 1567.0923, "eval_samples_per_second": 65.846, "eval_steps_per_second": 4.116, "step": 939 }, { "epoch": 0.31, "learning_rate": 0.00018633540372670805, "loss": 6.0182, "step": 1000 }, { "epoch": 0.39, "eval_accuracy": 0.17392094665213684, "eval_loss": 5.876271724700928, "eval_runtime": 1568.897, "eval_samples_per_second": 65.77, "eval_steps_per_second": 4.111, "step": 1252 }, { "epoch": 0.47, "learning_rate": 0.0002795031055900621, "loss": 5.8908, "step": 1500 }, { "epoch": 0.49, "eval_accuracy": 0.17560553688955177, "eval_loss": 5.8265557289123535, "eval_runtime": 1564.0767, "eval_samples_per_second": 65.973, "eval_steps_per_second": 4.124, "step": 1565 }, { "epoch": 0.58, "eval_accuracy": 0.1755758633601614, "eval_loss": 5.785298824310303, "eval_runtime": 1563.8448, "eval_samples_per_second": 65.983, "eval_steps_per_second": 4.124, "step": 1878 }, { "epoch": 0.62, "learning_rate": 0.0003726708074534161, "loss": 5.8105, "step": 2000 }, { "epoch": 0.68, "eval_accuracy": 0.1851031730080446, "eval_loss": 5.230679988861084, "eval_runtime": 1563.8141, "eval_samples_per_second": 65.984, "eval_steps_per_second": 4.125, "step": 2191 }, { "epoch": 0.78, "learning_rate": 0.0004658385093167702, "loss": 4.8469, "step": 2500 }, { "epoch": 0.78, "eval_accuracy": 0.49302250475931464, "eval_loss": 2.994337320327759, "eval_runtime": 1564.1944, "eval_samples_per_second": 65.968, "eval_steps_per_second": 4.124, "step": 2504 }, { "epoch": 0.87, "eval_accuracy": 0.6404133912717029, "eval_loss": 1.8836123943328857, "eval_runtime": 1565.3247, "eval_samples_per_second": 65.921, "eval_steps_per_second": 4.121, "step": 2817 }, { "epoch": 0.93, "learning_rate": 0.0005590062111801242, "loss": 2.2431, "step": 3000 }, { "epoch": 0.97, "eval_accuracy": 0.6801897739457411, "eval_loss": 1.6012158393859863, "eval_runtime": 1567.0654, "eval_samples_per_second": 65.847, "eval_steps_per_second": 4.116, "step": 3130 }, { "epoch": 1.07, "eval_accuracy": 0.7009936840521221, "eval_loss": 1.4589687585830688, "eval_runtime": 1567.9702, "eval_samples_per_second": 65.809, "eval_steps_per_second": 4.114, "step": 3443 }, { "epoch": 1.09, "learning_rate": 0.0005942028985507245, "loss": 1.618, "step": 3500 }, { "epoch": 1.17, "eval_accuracy": 0.7143850078266079, "eval_loss": 1.3708570003509521, "eval_runtime": 1567.3868, "eval_samples_per_second": 65.834, "eval_steps_per_second": 4.115, "step": 3756 }, { "epoch": 1.24, "learning_rate": 0.0005838509316770186, "loss": 1.436, "step": 4000 }, { "epoch": 1.26, "eval_accuracy": 0.725278557944805, "eval_loss": 1.2998367547988892, "eval_runtime": 1564.9997, "eval_samples_per_second": 65.934, "eval_steps_per_second": 4.121, "step": 4069 }, { "epoch": 1.36, "eval_accuracy": 0.7341513279268731, "eval_loss": 1.2506020069122314, "eval_runtime": 1565.2355, "eval_samples_per_second": 65.924, "eval_steps_per_second": 4.121, "step": 4382 }, { "epoch": 1.4, "learning_rate": 0.0005734989648033125, "loss": 1.3344, "step": 4500 }, { "epoch": 1.46, "eval_accuracy": 0.7414229730121367, "eval_loss": 1.2084842920303345, "eval_runtime": 1564.6672, "eval_samples_per_second": 65.948, "eval_steps_per_second": 4.122, "step": 4695 }, { "epoch": 1.55, "learning_rate": 0.0005631469979296066, "loss": 1.2678, "step": 5000 }, { "epoch": 1.56, "eval_accuracy": 0.7460624134488423, "eval_loss": 1.1799527406692505, "eval_runtime": 1565.6909, "eval_samples_per_second": 65.905, "eval_steps_per_second": 4.12, "step": 5008 }, { "epoch": 1.65, "eval_accuracy": 0.7513162543657942, "eval_loss": 1.1497138738632202, "eval_runtime": 1565.9769, "eval_samples_per_second": 65.893, "eval_steps_per_second": 4.119, "step": 5321 }, { "epoch": 1.71, "learning_rate": 0.0005527950310559006, "loss": 1.2183, "step": 5500 }, { "epoch": 1.75, "eval_accuracy": 0.7555471802775164, "eval_loss": 1.1244266033172607, "eval_runtime": 1565.313, "eval_samples_per_second": 65.921, "eval_steps_per_second": 4.121, "step": 5634 }, { "epoch": 1.85, "eval_accuracy": 0.7586796535932161, "eval_loss": 1.1065826416015625, "eval_runtime": 1565.5955, "eval_samples_per_second": 65.909, "eval_steps_per_second": 4.12, "step": 5947 }, { "epoch": 1.86, "learning_rate": 0.0005424430641821946, "loss": 1.1764, "step": 6000 }, { "epoch": 1.94, "eval_accuracy": 0.7621753158952852, "eval_loss": 1.0877457857131958, "eval_runtime": 1565.477, "eval_samples_per_second": 65.914, "eval_steps_per_second": 4.12, "step": 6260 }, { "epoch": 2.02, "learning_rate": 0.0005320910973084886, "loss": 1.1481, "step": 6500 }, { "epoch": 2.04, "eval_accuracy": 0.7653308256390273, "eval_loss": 1.069921851158142, "eval_runtime": 1565.5751, "eval_samples_per_second": 65.91, "eval_steps_per_second": 4.12, "step": 6573 }, { "epoch": 2.14, "eval_accuracy": 0.7669783269978627, "eval_loss": 1.059142827987671, "eval_runtime": 1564.7195, "eval_samples_per_second": 65.946, "eval_steps_per_second": 4.122, "step": 6886 }, { "epoch": 2.17, "learning_rate": 0.0005217391304347826, "loss": 1.1169, "step": 7000 }, { "epoch": 2.24, "eval_accuracy": 0.7701243077960133, "eval_loss": 1.0415377616882324, "eval_runtime": 1565.3407, "eval_samples_per_second": 65.92, "eval_steps_per_second": 4.121, "step": 7199 }, { "epoch": 2.33, "learning_rate": 0.0005113871635610765, "loss": 1.0953, "step": 7500 }, { "epoch": 2.33, "eval_accuracy": 0.7718881473585557, "eval_loss": 1.0299146175384521, "eval_runtime": 1565.1494, "eval_samples_per_second": 65.928, "eval_steps_per_second": 4.121, "step": 7512 }, { "epoch": 2.43, "eval_accuracy": 0.7742740416938212, "eval_loss": 1.0169405937194824, "eval_runtime": 1565.1719, "eval_samples_per_second": 65.927, "eval_steps_per_second": 4.121, "step": 7825 }, { "epoch": 2.48, "learning_rate": 0.0005010351966873705, "loss": 1.0743, "step": 8000 }, { "epoch": 2.53, "eval_accuracy": 0.7762836729268914, "eval_loss": 1.006744146347046, "eval_runtime": 1565.0712, "eval_samples_per_second": 65.931, "eval_steps_per_second": 4.121, "step": 8138 }, { "epoch": 2.62, "eval_accuracy": 0.7780289191145969, "eval_loss": 0.9963796734809875, "eval_runtime": 1564.7218, "eval_samples_per_second": 65.946, "eval_steps_per_second": 4.122, "step": 8451 }, { "epoch": 2.64, "learning_rate": 0.0004906832298136645, "loss": 1.0582, "step": 8500 }, { "epoch": 2.72, "eval_accuracy": 0.7799966088129119, "eval_loss": 0.9861342310905457, "eval_runtime": 1564.6716, "eval_samples_per_second": 65.948, "eval_steps_per_second": 4.122, "step": 8764 }, { "epoch": 2.79, "learning_rate": 0.00048033126293995856, "loss": 1.0401, "step": 9000 }, { "epoch": 2.82, "eval_accuracy": 0.7818533899689635, "eval_loss": 0.9751449227333069, "eval_runtime": 1564.9804, "eval_samples_per_second": 65.935, "eval_steps_per_second": 4.121, "step": 9077 }, { "epoch": 2.92, "eval_accuracy": 0.7830731640909905, "eval_loss": 0.9680520296096802, "eval_runtime": 1565.4431, "eval_samples_per_second": 65.916, "eval_steps_per_second": 4.12, "step": 9390 }, { "epoch": 2.95, "learning_rate": 0.00046997929606625255, "loss": 1.0262, "step": 9500 }, { "epoch": 3.01, "eval_accuracy": 0.7843024373004699, "eval_loss": 0.9606424570083618, "eval_runtime": 1565.1725, "eval_samples_per_second": 65.927, "eval_steps_per_second": 4.121, "step": 9703 }, { "epoch": 3.11, "learning_rate": 0.0004596273291925466, "loss": 1.0093, "step": 10000 }, { "epoch": 3.11, "eval_accuracy": 0.7858115501363246, "eval_loss": 0.9535852670669556, "eval_runtime": 1564.8587, "eval_samples_per_second": 65.94, "eval_steps_per_second": 4.122, "step": 10016 }, { "epoch": 3.21, "eval_accuracy": 0.7874158219284931, "eval_loss": 0.9444663524627686, "eval_runtime": 1564.7558, "eval_samples_per_second": 65.944, "eval_steps_per_second": 4.122, "step": 10329 }, { "epoch": 3.26, "learning_rate": 0.0004492753623188405, "loss": 0.995, "step": 10500 }, { "epoch": 3.3, "eval_accuracy": 0.7884454282264995, "eval_loss": 0.9367947578430176, "eval_runtime": 1565.5731, "eval_samples_per_second": 65.91, "eval_steps_per_second": 4.12, "step": 10642 }, { "epoch": 3.4, "eval_accuracy": 0.7902011668190658, "eval_loss": 0.9284105896949768, "eval_runtime": 1564.8586, "eval_samples_per_second": 65.94, "eval_steps_per_second": 4.122, "step": 10955 }, { "epoch": 3.42, "learning_rate": 0.0004389233954451345, "loss": 0.983, "step": 11000 }, { "epoch": 3.5, "eval_accuracy": 0.7912236011775762, "eval_loss": 0.9227182269096375, "eval_runtime": 1564.9848, "eval_samples_per_second": 65.935, "eval_steps_per_second": 4.121, "step": 11268 }, { "epoch": 3.57, "learning_rate": 0.00042857142857142855, "loss": 0.9728, "step": 11500 }, { "epoch": 3.6, "eval_accuracy": 0.7926902278847275, "eval_loss": 0.9144140481948853, "eval_runtime": 1564.605, "eval_samples_per_second": 65.951, "eval_steps_per_second": 4.122, "step": 11581 }, { "epoch": 3.69, "eval_accuracy": 0.7936642140619866, "eval_loss": 0.9096552133560181, "eval_runtime": 1565.1262, "eval_samples_per_second": 65.929, "eval_steps_per_second": 4.121, "step": 11894 }, { "epoch": 3.73, "learning_rate": 0.00041821946169772254, "loss": 0.9632, "step": 12000 }, { "epoch": 3.79, "eval_accuracy": 0.7947595817774112, "eval_loss": 0.9028408527374268, "eval_runtime": 1564.7786, "eval_samples_per_second": 65.944, "eval_steps_per_second": 4.122, "step": 12207 }, { "epoch": 3.88, "learning_rate": 0.0004078674948240166, "loss": 0.953, "step": 12500 }, { "epoch": 3.89, "eval_accuracy": 0.7958614481801236, "eval_loss": 0.896992564201355, "eval_runtime": 1564.1136, "eval_samples_per_second": 65.972, "eval_steps_per_second": 4.124, "step": 12520 }, { "epoch": 3.99, "eval_accuracy": 0.796848649990296, "eval_loss": 0.8908724188804626, "eval_runtime": 1564.2284, "eval_samples_per_second": 65.967, "eval_steps_per_second": 4.123, "step": 12833 }, { "epoch": 4.04, "learning_rate": 0.0003975155279503105, "loss": 0.9414, "step": 13000 }, { "epoch": 4.08, "eval_accuracy": 0.797744672086327, "eval_loss": 0.8873794674873352, "eval_runtime": 1565.739, "eval_samples_per_second": 65.903, "eval_steps_per_second": 4.119, "step": 13146 }, { "epoch": 4.18, "eval_accuracy": 0.7986036425092956, "eval_loss": 0.881266713142395, "eval_runtime": 1565.4321, "eval_samples_per_second": 65.916, "eval_steps_per_second": 4.12, "step": 13459 }, { "epoch": 4.19, "learning_rate": 0.0003871635610766045, "loss": 0.9287, "step": 13500 }, { "epoch": 4.28, "eval_accuracy": 0.7999304297565917, "eval_loss": 0.8743026852607727, "eval_runtime": 1564.903, "eval_samples_per_second": 65.938, "eval_steps_per_second": 4.122, "step": 13772 }, { "epoch": 4.35, "learning_rate": 0.00037681159420289854, "loss": 0.9219, "step": 14000 }, { "epoch": 4.37, "eval_accuracy": 0.80051802299604, "eval_loss": 0.8717477917671204, "eval_runtime": 1565.3161, "eval_samples_per_second": 65.921, "eval_steps_per_second": 4.121, "step": 14085 }, { "epoch": 4.47, "eval_accuracy": 0.801263244223999, "eval_loss": 0.8662565350532532, "eval_runtime": 1564.6984, "eval_samples_per_second": 65.947, "eval_steps_per_second": 4.122, "step": 14398 }, { "epoch": 4.5, "learning_rate": 0.0003664596273291925, "loss": 0.9137, "step": 14500 }, { "epoch": 4.57, "eval_accuracy": 0.8024215648096065, "eval_loss": 0.8607039451599121, "eval_runtime": 1565.2999, "eval_samples_per_second": 65.922, "eval_steps_per_second": 4.121, "step": 14711 }, { "epoch": 4.66, "learning_rate": 0.0003561076604554865, "loss": 0.9062, "step": 15000 }, { "epoch": 4.67, "eval_accuracy": 0.80312043845701, "eval_loss": 0.8565826416015625, "eval_runtime": 1566.6554, "eval_samples_per_second": 65.865, "eval_steps_per_second": 4.117, "step": 15024 }, { "epoch": 4.76, "eval_accuracy": 0.8044941042086672, "eval_loss": 0.8497495055198669, "eval_runtime": 1566.3382, "eval_samples_per_second": 65.878, "eval_steps_per_second": 4.118, "step": 15337 }, { "epoch": 4.81, "learning_rate": 0.0003457556935817805, "loss": 0.8978, "step": 15500 }, { "epoch": 4.86, "eval_accuracy": 0.8053608800858211, "eval_loss": 0.8438310623168945, "eval_runtime": 1565.4433, "eval_samples_per_second": 65.916, "eval_steps_per_second": 4.12, "step": 15650 }, { "epoch": 4.96, "eval_accuracy": 0.8064097453094475, "eval_loss": 0.8402115702629089, "eval_runtime": 1565.8792, "eval_samples_per_second": 65.897, "eval_steps_per_second": 4.119, "step": 15963 }, { "epoch": 4.97, "learning_rate": 0.0003354037267080745, "loss": 0.8907, "step": 16000 }, { "epoch": 5.05, "eval_accuracy": 0.8072085264677659, "eval_loss": 0.8345613479614258, "eval_runtime": 1565.4826, "eval_samples_per_second": 65.914, "eval_steps_per_second": 4.12, "step": 16276 }, { "epoch": 5.12, "learning_rate": 0.0003250517598343685, "loss": 0.8791, "step": 16500 }, { "epoch": 5.15, "eval_accuracy": 0.8079710854147522, "eval_loss": 0.8311530947685242, "eval_runtime": 1565.3772, "eval_samples_per_second": 65.918, "eval_steps_per_second": 4.12, "step": 16589 }, { "epoch": 5.25, "eval_accuracy": 0.808113472692542, "eval_loss": 0.8312503695487976, "eval_runtime": 1564.9419, "eval_samples_per_second": 65.937, "eval_steps_per_second": 4.122, "step": 16902 }, { "epoch": 5.28, "learning_rate": 0.00031469979296066245, "loss": 0.8713, "step": 17000 }, { "epoch": 5.35, "eval_accuracy": 0.8089870028151737, "eval_loss": 0.8246920108795166, "eval_runtime": 1566.1653, "eval_samples_per_second": 65.885, "eval_steps_per_second": 4.118, "step": 17215 }, { "epoch": 5.43, "learning_rate": 0.0003043478260869565, "loss": 0.866, "step": 17500 }, { "epoch": 5.44, "eval_accuracy": 0.8100817167380647, "eval_loss": 0.8203573822975159, "eval_runtime": 1563.7507, "eval_samples_per_second": 65.987, "eval_steps_per_second": 4.125, "step": 17528 }, { "epoch": 5.54, "eval_accuracy": 0.8106388454645975, "eval_loss": 0.8163631558418274, "eval_runtime": 1563.2024, "eval_samples_per_second": 66.01, "eval_steps_per_second": 4.126, "step": 17841 }, { "epoch": 5.59, "learning_rate": 0.0002939958592132505, "loss": 0.8607, "step": 18000 }, { "epoch": 5.64, "eval_accuracy": 0.8114071774300123, "eval_loss": 0.8129469752311707, "eval_runtime": 1563.1261, "eval_samples_per_second": 66.013, "eval_steps_per_second": 4.126, "step": 18154 }, { "epoch": 5.73, "eval_accuracy": 0.8122062364179543, "eval_loss": 0.808627724647522, "eval_runtime": 1564.3551, "eval_samples_per_second": 65.961, "eval_steps_per_second": 4.123, "step": 18467 }, { "epoch": 5.74, "learning_rate": 0.00028364389233954447, "loss": 0.8534, "step": 18500 }, { "epoch": 5.83, "eval_accuracy": 0.8128903362394657, "eval_loss": 0.8055641651153564, "eval_runtime": 1563.7512, "eval_samples_per_second": 65.987, "eval_steps_per_second": 4.125, "step": 18780 }, { "epoch": 5.9, "learning_rate": 0.0002732919254658385, "loss": 0.8448, "step": 19000 }, { "epoch": 5.93, "eval_accuracy": 0.8134494722363886, "eval_loss": 0.8019941449165344, "eval_runtime": 1563.2984, "eval_samples_per_second": 66.006, "eval_steps_per_second": 4.126, "step": 19093 }, { "epoch": 6.03, "eval_accuracy": 0.8143192139194421, "eval_loss": 0.7982867956161499, "eval_runtime": 1563.6693, "eval_samples_per_second": 65.99, "eval_steps_per_second": 4.125, "step": 19406 }, { "epoch": 6.06, "learning_rate": 0.0002629399585921325, "loss": 0.8391, "step": 19500 }, { "epoch": 6.12, "eval_accuracy": 0.8147886522353383, "eval_loss": 0.7949500679969788, "eval_runtime": 1563.7961, "eval_samples_per_second": 65.985, "eval_steps_per_second": 4.125, "step": 19719 }, { "epoch": 6.21, "learning_rate": 0.0002525879917184265, "loss": 0.8304, "step": 20000 }, { "epoch": 6.22, "eval_accuracy": 0.8155781682195781, "eval_loss": 0.7908634543418884, "eval_runtime": 1563.2059, "eval_samples_per_second": 66.01, "eval_steps_per_second": 4.126, "step": 20032 }, { "epoch": 6.32, "eval_accuracy": 0.816149684044946, "eval_loss": 0.7888238430023193, "eval_runtime": 1565.7848, "eval_samples_per_second": 65.901, "eval_steps_per_second": 4.119, "step": 20345 }, { "epoch": 6.37, "learning_rate": 0.00024223602484472047, "loss": 0.825, "step": 20500 }, { "epoch": 6.41, "eval_accuracy": 0.8167946967773272, "eval_loss": 0.7858228087425232, "eval_runtime": 1565.9773, "eval_samples_per_second": 65.893, "eval_steps_per_second": 4.119, "step": 20658 }, { "epoch": 6.51, "eval_accuracy": 0.8174543786608812, "eval_loss": 0.7798792123794556, "eval_runtime": 1566.9273, "eval_samples_per_second": 65.853, "eval_steps_per_second": 4.116, "step": 20971 }, { "epoch": 6.52, "learning_rate": 0.00023188405797101448, "loss": 0.8198, "step": 21000 }, { "epoch": 6.61, "eval_accuracy": 0.818135156601779, "eval_loss": 0.7765618562698364, "eval_runtime": 1568.3727, "eval_samples_per_second": 65.792, "eval_steps_per_second": 4.113, "step": 21284 }, { "epoch": 6.68, "learning_rate": 0.00022153209109730847, "loss": 0.8139, "step": 21500 }, { "epoch": 6.71, "eval_accuracy": 0.8189553029778005, "eval_loss": 0.7735697627067566, "eval_runtime": 1567.9384, "eval_samples_per_second": 65.811, "eval_steps_per_second": 4.114, "step": 21597 }, { "epoch": 6.8, "eval_accuracy": 0.8195096365262311, "eval_loss": 0.7705960273742676, "eval_runtime": 1568.1878, "eval_samples_per_second": 65.8, "eval_steps_per_second": 4.113, "step": 21910 }, { "epoch": 6.83, "learning_rate": 0.00021118012422360248, "loss": 0.8094, "step": 22000 }, { "epoch": 6.9, "eval_accuracy": 0.8200746154831311, "eval_loss": 0.7673328518867493, "eval_runtime": 1567.3958, "eval_samples_per_second": 65.833, "eval_steps_per_second": 4.115, "step": 22223 }, { "epoch": 6.99, "learning_rate": 0.00020082815734989647, "loss": 0.8032, "step": 22500 }, { "epoch": 7.0, "eval_accuracy": 0.8205203976743998, "eval_loss": 0.764415979385376, "eval_runtime": 1567.2437, "eval_samples_per_second": 65.84, "eval_steps_per_second": 4.116, "step": 22536 }, { "epoch": 7.1, "eval_accuracy": 0.8215158231256271, "eval_loss": 0.7607036232948303, "eval_runtime": 1568.907, "eval_samples_per_second": 65.77, "eval_steps_per_second": 4.111, "step": 22849 }, { "epoch": 7.14, "learning_rate": 0.00019047619047619045, "loss": 0.794, "step": 23000 }, { "epoch": 7.19, "eval_accuracy": 0.8218634447758527, "eval_loss": 0.7578034996986389, "eval_runtime": 1562.4876, "eval_samples_per_second": 66.04, "eval_steps_per_second": 4.128, "step": 23162 }, { "epoch": 7.29, "eval_accuracy": 0.8225726120979496, "eval_loss": 0.7539456486701965, "eval_runtime": 1563.2377, "eval_samples_per_second": 66.009, "eval_steps_per_second": 4.126, "step": 23475 }, { "epoch": 7.3, "learning_rate": 0.00018012422360248444, "loss": 0.7891, "step": 23500 }, { "epoch": 7.39, "eval_accuracy": 0.8229123629611099, "eval_loss": 0.751589298248291, "eval_runtime": 1562.2621, "eval_samples_per_second": 66.05, "eval_steps_per_second": 4.129, "step": 23788 }, { "epoch": 7.45, "learning_rate": 0.00016977225672877845, "loss": 0.7854, "step": 24000 }, { "epoch": 7.48, "eval_accuracy": 0.8234190549813617, "eval_loss": 0.7492088079452515, "eval_runtime": 1562.6896, "eval_samples_per_second": 66.032, "eval_steps_per_second": 4.127, "step": 24101 }, { "epoch": 7.58, "eval_accuracy": 0.823868769101777, "eval_loss": 0.7464490532875061, "eval_runtime": 1563.2351, "eval_samples_per_second": 66.009, "eval_steps_per_second": 4.126, "step": 24414 }, { "epoch": 7.61, "learning_rate": 0.00015942028985507247, "loss": 0.7813, "step": 24500 }, { "epoch": 7.68, "eval_accuracy": 0.8245509114698386, "eval_loss": 0.7424126863479614, "eval_runtime": 1563.6491, "eval_samples_per_second": 65.991, "eval_steps_per_second": 4.125, "step": 24727 }, { "epoch": 7.76, "learning_rate": 0.00014906832298136645, "loss": 0.7757, "step": 25000 }, { "epoch": 7.78, "eval_accuracy": 0.8252597131033751, "eval_loss": 0.7401213049888611, "eval_runtime": 1563.1841, "eval_samples_per_second": 66.011, "eval_steps_per_second": 4.126, "step": 25040 }, { "epoch": 7.87, "eval_accuracy": 0.8254715067571893, "eval_loss": 0.7378228306770325, "eval_runtime": 1563.1126, "eval_samples_per_second": 66.014, "eval_steps_per_second": 4.126, "step": 25353 }, { "epoch": 7.92, "learning_rate": 0.00013871635610766044, "loss": 0.7722, "step": 25500 }, { "epoch": 7.97, "eval_accuracy": 0.8263812114502447, "eval_loss": 0.734555184841156, "eval_runtime": 1563.3264, "eval_samples_per_second": 66.005, "eval_steps_per_second": 4.126, "step": 25666 }, { "epoch": 8.07, "eval_accuracy": 0.8266910048907001, "eval_loss": 0.7316432595252991, "eval_runtime": 1565.1422, "eval_samples_per_second": 65.928, "eval_steps_per_second": 4.121, "step": 25979 }, { "epoch": 8.07, "learning_rate": 0.00012836438923395443, "loss": 0.765, "step": 26000 }, { "epoch": 8.16, "eval_accuracy": 0.8270383479169559, "eval_loss": 0.7292919754981995, "eval_runtime": 1563.9337, "eval_samples_per_second": 65.979, "eval_steps_per_second": 4.124, "step": 26292 }, { "epoch": 8.23, "learning_rate": 0.00011801242236024844, "loss": 0.7584, "step": 26500 }, { "epoch": 8.26, "eval_accuracy": 0.8277969783059648, "eval_loss": 0.7266284227371216, "eval_runtime": 1566.4843, "eval_samples_per_second": 65.872, "eval_steps_per_second": 4.118, "step": 26605 }, { "epoch": 8.36, "eval_accuracy": 0.8280860122956746, "eval_loss": 0.7245298624038696, "eval_runtime": 1564.0929, "eval_samples_per_second": 65.972, "eval_steps_per_second": 4.124, "step": 26918 }, { "epoch": 8.38, "learning_rate": 0.00010766045548654244, "loss": 0.7565, "step": 27000 }, { "epoch": 8.46, "eval_accuracy": 0.8286899161017834, "eval_loss": 0.7209280729293823, "eval_runtime": 1567.5209, "eval_samples_per_second": 65.828, "eval_steps_per_second": 4.115, "step": 27231 }, { "epoch": 8.54, "learning_rate": 9.730848861283643e-05, "loss": 0.7506, "step": 27500 }, { "epoch": 8.55, "eval_accuracy": 0.829094583836786, "eval_loss": 0.7193953990936279, "eval_runtime": 1567.7556, "eval_samples_per_second": 65.818, "eval_steps_per_second": 4.114, "step": 27544 }, { "epoch": 8.65, "eval_accuracy": 0.8296025421755568, "eval_loss": 0.7171857357025146, "eval_runtime": 1567.618, "eval_samples_per_second": 65.824, "eval_steps_per_second": 4.115, "step": 27857 }, { "epoch": 8.69, "learning_rate": 8.695652173913043e-05, "loss": 0.7469, "step": 28000 }, { "epoch": 8.75, "eval_accuracy": 0.8297588118895532, "eval_loss": 0.715788722038269, "eval_runtime": 1568.2621, "eval_samples_per_second": 65.797, "eval_steps_per_second": 4.113, "step": 28170 }, { "epoch": 8.84, "eval_accuracy": 0.8303201783553539, "eval_loss": 0.7129087448120117, "eval_runtime": 1566.51, "eval_samples_per_second": 65.871, "eval_steps_per_second": 4.117, "step": 28483 }, { "epoch": 8.85, "learning_rate": 7.660455486542441e-05, "loss": 0.7434, "step": 28500 }, { "epoch": 8.94, "eval_accuracy": 0.8310723013027939, "eval_loss": 0.7107537984848022, "eval_runtime": 1565.7298, "eval_samples_per_second": 65.903, "eval_steps_per_second": 4.119, "step": 28796 }, { "epoch": 9.01, "learning_rate": 6.625258799171841e-05, "loss": 0.7395, "step": 29000 }, { "epoch": 9.04, "eval_accuracy": 0.8311976533176414, "eval_loss": 0.7103267908096313, "eval_runtime": 1566.6592, "eval_samples_per_second": 65.864, "eval_steps_per_second": 4.117, "step": 29109 }, { "epoch": 9.14, "eval_accuracy": 0.831599738008909, "eval_loss": 0.7076959609985352, "eval_runtime": 1563.475, "eval_samples_per_second": 65.998, "eval_steps_per_second": 4.125, "step": 29422 }, { "epoch": 9.16, "learning_rate": 5.5900621118012414e-05, "loss": 0.7327, "step": 29500 }, { "epoch": 9.23, "eval_accuracy": 0.8318789165507281, "eval_loss": 0.7060185074806213, "eval_runtime": 1563.2277, "eval_samples_per_second": 66.009, "eval_steps_per_second": 4.126, "step": 29735 }, { "epoch": 9.32, "learning_rate": 4.554865424430642e-05, "loss": 0.7294, "step": 30000 }, { "epoch": 9.33, "eval_accuracy": 0.8324941188917658, "eval_loss": 0.7029792070388794, "eval_runtime": 1563.5398, "eval_samples_per_second": 65.996, "eval_steps_per_second": 4.125, "step": 30048 }, { "epoch": 9.43, "eval_accuracy": 0.832804502850609, "eval_loss": 0.7012088298797607, "eval_runtime": 1563.5997, "eval_samples_per_second": 65.993, "eval_steps_per_second": 4.125, "step": 30361 }, { "epoch": 9.47, "learning_rate": 3.5196687370600414e-05, "loss": 0.7277, "step": 30500 }, { "epoch": 9.53, "eval_accuracy": 0.8329869074323458, "eval_loss": 0.7000030875205994, "eval_runtime": 1563.8346, "eval_samples_per_second": 65.983, "eval_steps_per_second": 4.124, "step": 30674 }, { "epoch": 9.62, "eval_accuracy": 0.8334238360274034, "eval_loss": 0.6988800764083862, "eval_runtime": 1563.4183, "eval_samples_per_second": 66.001, "eval_steps_per_second": 4.126, "step": 30987 }, { "epoch": 9.63, "learning_rate": 2.4844720496894407e-05, "loss": 0.7237, "step": 31000 }, { "epoch": 9.72, "eval_accuracy": 0.8335258711989345, "eval_loss": 0.6981679201126099, "eval_runtime": 1564.2656, "eval_samples_per_second": 65.965, "eval_steps_per_second": 4.123, "step": 31300 }, { "epoch": 9.78, "learning_rate": 1.4492753623188405e-05, "loss": 0.7233, "step": 31500 }, { "epoch": 9.82, "eval_accuracy": 0.8338284368943191, "eval_loss": 0.6956919431686401, "eval_runtime": 1565.7206, "eval_samples_per_second": 65.904, "eval_steps_per_second": 4.12, "step": 31613 }, { "epoch": 9.91, "eval_accuracy": 0.8342045312252818, "eval_loss": 0.6948868632316589, "eval_runtime": 1564.1555, "eval_samples_per_second": 65.97, "eval_steps_per_second": 4.124, "step": 31926 }, { "epoch": 9.94, "learning_rate": 4.140786749482401e-06, "loss": 0.7196, "step": 32000 }, { "epoch": 10.0, "step": 32200, "total_flos": 2.1701190649823816e+18, "train_loss": 1.3247782011802152, "train_runtime": 514233.1245, "train_samples_per_second": 16.031, "train_steps_per_second": 0.063 } ], "max_steps": 32200, "num_train_epochs": 10, "total_flos": 2.1701190649823816e+18, "trial_name": null, "trial_params": null }