{ "best_metric": 3.9793689250946045, "best_model_checkpoint": "output_hemo_neg_3/checkpoint-18392", "epoch": 500.0, "eval_steps": 500, "global_step": 19000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "learning_rate": 9.98e-07, "loss": 5.9415, "step": 38 }, { "epoch": 1.0, "eval_accuracy": 0.30840664711632454, "eval_loss": 5.606841087341309, "eval_runtime": 0.5994, "eval_samples_per_second": 6.673, "eval_steps_per_second": 1.668, "step": 38 }, { "epoch": 2.0, "learning_rate": 9.959999999999999e-07, "loss": 5.7302, "step": 76 }, { "epoch": 2.0, "eval_accuracy": 0.32038123167155425, "eval_loss": 5.426336288452148, "eval_runtime": 0.6035, "eval_samples_per_second": 6.628, "eval_steps_per_second": 1.657, "step": 76 }, { "epoch": 3.0, "learning_rate": 9.94e-07, "loss": 5.5675, "step": 114 }, { "epoch": 3.0, "eval_accuracy": 0.323069403714565, "eval_loss": 5.287517070770264, "eval_runtime": 0.6089, "eval_samples_per_second": 6.57, "eval_steps_per_second": 1.642, "step": 114 }, { "epoch": 4.0, "learning_rate": 9.92e-07, "loss": 5.4594, "step": 152 }, { "epoch": 4.0, "eval_accuracy": 0.3250244379276637, "eval_loss": 5.205501556396484, "eval_runtime": 0.6097, "eval_samples_per_second": 6.56, "eval_steps_per_second": 1.64, "step": 152 }, { "epoch": 5.0, "learning_rate": 9.9e-07, "loss": 5.3808, "step": 190 }, { "epoch": 5.0, "eval_accuracy": 0.3296676441837732, "eval_loss": 5.158883094787598, "eval_runtime": 0.6099, "eval_samples_per_second": 6.558, "eval_steps_per_second": 1.639, "step": 190 }, { "epoch": 6.0, "learning_rate": 9.88e-07, "loss": 5.3353, "step": 228 }, { "epoch": 6.0, "eval_accuracy": 0.3321114369501466, "eval_loss": 5.119546413421631, "eval_runtime": 0.6108, "eval_samples_per_second": 6.549, "eval_steps_per_second": 1.637, "step": 228 }, { "epoch": 7.0, "learning_rate": 9.86e-07, "loss": 5.2946, "step": 266 }, { "epoch": 7.0, "eval_accuracy": 0.333822091886608, "eval_loss": 5.077916622161865, "eval_runtime": 0.6128, "eval_samples_per_second": 6.528, "eval_steps_per_second": 1.632, "step": 266 }, { "epoch": 8.0, "learning_rate": 9.84e-07, "loss": 5.2632, "step": 304 }, { "epoch": 8.0, "eval_accuracy": 0.33699902248289343, "eval_loss": 5.043184280395508, "eval_runtime": 0.6131, "eval_samples_per_second": 6.524, "eval_steps_per_second": 1.631, "step": 304 }, { "epoch": 9.0, "learning_rate": 9.819999999999999e-07, "loss": 5.2279, "step": 342 }, { "epoch": 9.0, "eval_accuracy": 0.33724340175953077, "eval_loss": 5.015382766723633, "eval_runtime": 0.6122, "eval_samples_per_second": 6.534, "eval_steps_per_second": 1.634, "step": 342 }, { "epoch": 10.0, "learning_rate": 9.8e-07, "loss": 5.1999, "step": 380 }, { "epoch": 10.0, "eval_accuracy": 0.33773216031280545, "eval_loss": 4.993128299713135, "eval_runtime": 0.612, "eval_samples_per_second": 6.535, "eval_steps_per_second": 1.634, "step": 380 }, { "epoch": 11.0, "learning_rate": 9.78e-07, "loss": 5.1853, "step": 418 }, { "epoch": 11.0, "eval_accuracy": 0.3399315738025415, "eval_loss": 4.970090389251709, "eval_runtime": 0.6138, "eval_samples_per_second": 6.517, "eval_steps_per_second": 1.629, "step": 418 }, { "epoch": 12.0, "learning_rate": 9.759999999999998e-07, "loss": 5.1619, "step": 456 }, { "epoch": 12.0, "eval_accuracy": 0.3428641251221896, "eval_loss": 4.94577693939209, "eval_runtime": 0.6122, "eval_samples_per_second": 6.534, "eval_steps_per_second": 1.634, "step": 456 }, { "epoch": 13.0, "learning_rate": 9.74e-07, "loss": 5.1395, "step": 494 }, { "epoch": 13.0, "eval_accuracy": 0.34384164222873903, "eval_loss": 4.927363395690918, "eval_runtime": 0.6133, "eval_samples_per_second": 6.522, "eval_steps_per_second": 1.631, "step": 494 }, { "epoch": 14.0, "learning_rate": 9.72e-07, "loss": 5.1179, "step": 532 }, { "epoch": 14.0, "eval_accuracy": 0.34628543499511244, "eval_loss": 4.908015251159668, "eval_runtime": 0.6139, "eval_samples_per_second": 6.516, "eval_steps_per_second": 1.629, "step": 532 }, { "epoch": 15.0, "learning_rate": 9.7e-07, "loss": 5.1048, "step": 570 }, { "epoch": 15.0, "eval_accuracy": 0.3465298142717498, "eval_loss": 4.892131805419922, "eval_runtime": 0.6129, "eval_samples_per_second": 6.526, "eval_steps_per_second": 1.632, "step": 570 }, { "epoch": 16.0, "learning_rate": 9.679999999999999e-07, "loss": 5.0837, "step": 608 }, { "epoch": 16.0, "eval_accuracy": 0.34701857282502446, "eval_loss": 4.875644683837891, "eval_runtime": 0.6132, "eval_samples_per_second": 6.524, "eval_steps_per_second": 1.631, "step": 608 }, { "epoch": 17.0, "learning_rate": 9.66e-07, "loss": 5.067, "step": 646 }, { "epoch": 17.0, "eval_accuracy": 0.34921798631476053, "eval_loss": 4.860612869262695, "eval_runtime": 0.6131, "eval_samples_per_second": 6.525, "eval_steps_per_second": 1.631, "step": 646 }, { "epoch": 18.0, "learning_rate": 9.64e-07, "loss": 5.0516, "step": 684 }, { "epoch": 18.0, "eval_accuracy": 0.3506842619745846, "eval_loss": 4.846870422363281, "eval_runtime": 0.6137, "eval_samples_per_second": 6.518, "eval_steps_per_second": 1.629, "step": 684 }, { "epoch": 19.0, "learning_rate": 9.619999999999999e-07, "loss": 5.0313, "step": 722 }, { "epoch": 19.0, "eval_accuracy": 0.3521505376344086, "eval_loss": 4.836608409881592, "eval_runtime": 0.6136, "eval_samples_per_second": 6.519, "eval_steps_per_second": 1.63, "step": 722 }, { "epoch": 20.0, "learning_rate": 9.6e-07, "loss": 5.0225, "step": 760 }, { "epoch": 20.0, "eval_accuracy": 0.3526392961876833, "eval_loss": 4.827553749084473, "eval_runtime": 0.614, "eval_samples_per_second": 6.514, "eval_steps_per_second": 1.629, "step": 760 }, { "epoch": 21.0, "learning_rate": 9.58e-07, "loss": 5.0068, "step": 798 }, { "epoch": 21.0, "eval_accuracy": 0.3521505376344086, "eval_loss": 4.817898273468018, "eval_runtime": 0.6125, "eval_samples_per_second": 6.53, "eval_steps_per_second": 1.633, "step": 798 }, { "epoch": 22.0, "learning_rate": 9.559999999999998e-07, "loss": 4.9942, "step": 836 }, { "epoch": 22.0, "eval_accuracy": 0.3521505376344086, "eval_loss": 4.805068016052246, "eval_runtime": 0.613, "eval_samples_per_second": 6.526, "eval_steps_per_second": 1.631, "step": 836 }, { "epoch": 23.0, "learning_rate": 9.539999999999999e-07, "loss": 4.9758, "step": 874 }, { "epoch": 23.0, "eval_accuracy": 0.3526392961876833, "eval_loss": 4.796260356903076, "eval_runtime": 0.6145, "eval_samples_per_second": 6.509, "eval_steps_per_second": 1.627, "step": 874 }, { "epoch": 24.0, "learning_rate": 9.52e-07, "loss": 4.9605, "step": 912 }, { "epoch": 24.0, "eval_accuracy": 0.35288367546432065, "eval_loss": 4.78426456451416, "eval_runtime": 0.6125, "eval_samples_per_second": 6.53, "eval_steps_per_second": 1.633, "step": 912 }, { "epoch": 25.0, "learning_rate": 9.499999999999999e-07, "loss": 4.9525, "step": 950 }, { "epoch": 25.0, "eval_accuracy": 0.353128054740958, "eval_loss": 4.772826671600342, "eval_runtime": 0.613, "eval_samples_per_second": 6.525, "eval_steps_per_second": 1.631, "step": 950 }, { "epoch": 26.0, "learning_rate": 9.479999999999999e-07, "loss": 4.9409, "step": 988 }, { "epoch": 26.0, "eval_accuracy": 0.35239491691104596, "eval_loss": 4.761840343475342, "eval_runtime": 0.6133, "eval_samples_per_second": 6.522, "eval_steps_per_second": 1.63, "step": 988 }, { "epoch": 27.0, "learning_rate": 9.459999999999999e-07, "loss": 4.9328, "step": 1026 }, { "epoch": 27.0, "eval_accuracy": 0.3519061583577713, "eval_loss": 4.75234317779541, "eval_runtime": 0.6227, "eval_samples_per_second": 6.424, "eval_steps_per_second": 1.606, "step": 1026 }, { "epoch": 28.0, "learning_rate": 9.439999999999999e-07, "loss": 4.9168, "step": 1064 }, { "epoch": 28.0, "eval_accuracy": 0.3526392961876833, "eval_loss": 4.744428634643555, "eval_runtime": 0.6133, "eval_samples_per_second": 6.523, "eval_steps_per_second": 1.631, "step": 1064 }, { "epoch": 29.0, "learning_rate": 9.419999999999999e-07, "loss": 4.9057, "step": 1102 }, { "epoch": 29.0, "eval_accuracy": 0.3550830889540567, "eval_loss": 4.733183860778809, "eval_runtime": 0.6133, "eval_samples_per_second": 6.522, "eval_steps_per_second": 1.631, "step": 1102 }, { "epoch": 30.0, "learning_rate": 9.399999999999999e-07, "loss": 4.8896, "step": 1140 }, { "epoch": 30.0, "eval_accuracy": 0.3560606060606061, "eval_loss": 4.723690986633301, "eval_runtime": 0.6133, "eval_samples_per_second": 6.522, "eval_steps_per_second": 1.631, "step": 1140 }, { "epoch": 31.0, "learning_rate": 9.379999999999998e-07, "loss": 4.8869, "step": 1178 }, { "epoch": 31.0, "eval_accuracy": 0.35654936461388076, "eval_loss": 4.715620994567871, "eval_runtime": 0.6135, "eval_samples_per_second": 6.52, "eval_steps_per_second": 1.63, "step": 1178 }, { "epoch": 32.0, "learning_rate": 9.36e-07, "loss": 4.8798, "step": 1216 }, { "epoch": 32.0, "eval_accuracy": 0.3567937438905181, "eval_loss": 4.709283828735352, "eval_runtime": 0.6131, "eval_samples_per_second": 6.524, "eval_steps_per_second": 1.631, "step": 1216 }, { "epoch": 33.0, "learning_rate": 9.34e-07, "loss": 4.8591, "step": 1254 }, { "epoch": 33.0, "eval_accuracy": 0.3575268817204301, "eval_loss": 4.702914714813232, "eval_runtime": 0.6139, "eval_samples_per_second": 6.516, "eval_steps_per_second": 1.629, "step": 1254 }, { "epoch": 34.0, "learning_rate": 9.32e-07, "loss": 4.8548, "step": 1292 }, { "epoch": 34.0, "eval_accuracy": 0.35703812316715544, "eval_loss": 4.694584369659424, "eval_runtime": 0.6143, "eval_samples_per_second": 6.512, "eval_steps_per_second": 1.628, "step": 1292 }, { "epoch": 35.0, "learning_rate": 9.3e-07, "loss": 4.8502, "step": 1330 }, { "epoch": 35.0, "eval_accuracy": 0.35948191593352885, "eval_loss": 4.687127590179443, "eval_runtime": 0.614, "eval_samples_per_second": 6.515, "eval_steps_per_second": 1.629, "step": 1330 }, { "epoch": 36.0, "learning_rate": 9.28e-07, "loss": 4.8378, "step": 1368 }, { "epoch": 36.0, "eval_accuracy": 0.35948191593352885, "eval_loss": 4.680301189422607, "eval_runtime": 0.6138, "eval_samples_per_second": 6.517, "eval_steps_per_second": 1.629, "step": 1368 }, { "epoch": 37.0, "learning_rate": 9.26e-07, "loss": 4.829, "step": 1406 }, { "epoch": 37.0, "eval_accuracy": 0.35997067448680353, "eval_loss": 4.673268795013428, "eval_runtime": 0.6141, "eval_samples_per_second": 6.514, "eval_steps_per_second": 1.628, "step": 1406 }, { "epoch": 38.0, "learning_rate": 9.24e-07, "loss": 4.8177, "step": 1444 }, { "epoch": 38.0, "eval_accuracy": 0.3602150537634409, "eval_loss": 4.66432523727417, "eval_runtime": 0.6146, "eval_samples_per_second": 6.508, "eval_steps_per_second": 1.627, "step": 1444 }, { "epoch": 39.0, "learning_rate": 9.22e-07, "loss": 4.809, "step": 1482 }, { "epoch": 39.0, "eval_accuracy": 0.36070381231671556, "eval_loss": 4.6591081619262695, "eval_runtime": 0.613, "eval_samples_per_second": 6.526, "eval_steps_per_second": 1.631, "step": 1482 }, { "epoch": 40.0, "learning_rate": 9.2e-07, "loss": 4.8002, "step": 1520 }, { "epoch": 40.0, "eval_accuracy": 0.36070381231671556, "eval_loss": 4.650698661804199, "eval_runtime": 0.6143, "eval_samples_per_second": 6.512, "eval_steps_per_second": 1.628, "step": 1520 }, { "epoch": 41.0, "learning_rate": 9.18e-07, "loss": 4.7938, "step": 1558 }, { "epoch": 41.0, "eval_accuracy": 0.3614369501466276, "eval_loss": 4.643824577331543, "eval_runtime": 0.6142, "eval_samples_per_second": 6.513, "eval_steps_per_second": 1.628, "step": 1558 }, { "epoch": 42.0, "learning_rate": 9.16e-07, "loss": 4.7787, "step": 1596 }, { "epoch": 42.0, "eval_accuracy": 0.3616813294232649, "eval_loss": 4.636685848236084, "eval_runtime": 0.6139, "eval_samples_per_second": 6.516, "eval_steps_per_second": 1.629, "step": 1596 }, { "epoch": 43.0, "learning_rate": 9.14e-07, "loss": 4.7685, "step": 1634 }, { "epoch": 43.0, "eval_accuracy": 0.3629032258064516, "eval_loss": 4.630648136138916, "eval_runtime": 0.6149, "eval_samples_per_second": 6.505, "eval_steps_per_second": 1.626, "step": 1634 }, { "epoch": 44.0, "learning_rate": 9.12e-07, "loss": 4.762, "step": 1672 }, { "epoch": 44.0, "eval_accuracy": 0.36363636363636365, "eval_loss": 4.621088981628418, "eval_runtime": 0.6135, "eval_samples_per_second": 6.52, "eval_steps_per_second": 1.63, "step": 1672 }, { "epoch": 45.0, "learning_rate": 9.1e-07, "loss": 4.7487, "step": 1710 }, { "epoch": 45.0, "eval_accuracy": 0.36412512218963833, "eval_loss": 4.61327600479126, "eval_runtime": 0.6133, "eval_samples_per_second": 6.522, "eval_steps_per_second": 1.63, "step": 1710 }, { "epoch": 46.0, "learning_rate": 9.08e-07, "loss": 4.7451, "step": 1748 }, { "epoch": 46.0, "eval_accuracy": 0.364613880742913, "eval_loss": 4.605830669403076, "eval_runtime": 0.6144, "eval_samples_per_second": 6.511, "eval_steps_per_second": 1.628, "step": 1748 }, { "epoch": 47.0, "learning_rate": 9.06e-07, "loss": 4.7378, "step": 1786 }, { "epoch": 47.0, "eval_accuracy": 0.3658357771260997, "eval_loss": 4.600909233093262, "eval_runtime": 0.6132, "eval_samples_per_second": 6.523, "eval_steps_per_second": 1.631, "step": 1786 }, { "epoch": 48.0, "learning_rate": 9.039999999999999e-07, "loss": 4.7281, "step": 1824 }, { "epoch": 48.0, "eval_accuracy": 0.3658357771260997, "eval_loss": 4.5931782722473145, "eval_runtime": 0.6141, "eval_samples_per_second": 6.514, "eval_steps_per_second": 1.628, "step": 1824 }, { "epoch": 49.0, "learning_rate": 9.02e-07, "loss": 4.7196, "step": 1862 }, { "epoch": 49.0, "eval_accuracy": 0.3655913978494624, "eval_loss": 4.5888590812683105, "eval_runtime": 0.6129, "eval_samples_per_second": 6.526, "eval_steps_per_second": 1.632, "step": 1862 }, { "epoch": 50.0, "learning_rate": 9e-07, "loss": 4.7091, "step": 1900 }, { "epoch": 50.0, "eval_accuracy": 0.36656891495601174, "eval_loss": 4.581442356109619, "eval_runtime": 0.6131, "eval_samples_per_second": 6.524, "eval_steps_per_second": 1.631, "step": 1900 }, { "epoch": 51.0, "learning_rate": 8.98e-07, "loss": 4.7032, "step": 1938 }, { "epoch": 51.0, "eval_accuracy": 0.3668132942326491, "eval_loss": 4.5762939453125, "eval_runtime": 0.6259, "eval_samples_per_second": 6.391, "eval_steps_per_second": 1.598, "step": 1938 }, { "epoch": 52.0, "learning_rate": 8.96e-07, "loss": 4.6978, "step": 1976 }, { "epoch": 52.0, "eval_accuracy": 0.3668132942326491, "eval_loss": 4.573066711425781, "eval_runtime": 0.6125, "eval_samples_per_second": 6.53, "eval_steps_per_second": 1.633, "step": 1976 }, { "epoch": 53.0, "learning_rate": 8.939999999999999e-07, "loss": 4.6908, "step": 2014 }, { "epoch": 53.0, "eval_accuracy": 0.36730205278592376, "eval_loss": 4.5681657791137695, "eval_runtime": 0.6137, "eval_samples_per_second": 6.518, "eval_steps_per_second": 1.629, "step": 2014 }, { "epoch": 54.0, "learning_rate": 8.92e-07, "loss": 4.6776, "step": 2052 }, { "epoch": 54.0, "eval_accuracy": 0.36730205278592376, "eval_loss": 4.56380558013916, "eval_runtime": 0.6134, "eval_samples_per_second": 6.521, "eval_steps_per_second": 1.63, "step": 2052 }, { "epoch": 55.0, "learning_rate": 8.9e-07, "loss": 4.6667, "step": 2090 }, { "epoch": 55.0, "eval_accuracy": 0.3680351906158358, "eval_loss": 4.558794975280762, "eval_runtime": 0.614, "eval_samples_per_second": 6.514, "eval_steps_per_second": 1.629, "step": 2090 }, { "epoch": 56.0, "learning_rate": 8.88e-07, "loss": 4.6662, "step": 2128 }, { "epoch": 56.0, "eval_accuracy": 0.36852394916911047, "eval_loss": 4.5535197257995605, "eval_runtime": 0.6137, "eval_samples_per_second": 6.518, "eval_steps_per_second": 1.629, "step": 2128 }, { "epoch": 57.0, "learning_rate": 8.86e-07, "loss": 4.6567, "step": 2166 }, { "epoch": 57.0, "eval_accuracy": 0.36974584555229717, "eval_loss": 4.549376964569092, "eval_runtime": 0.6129, "eval_samples_per_second": 6.526, "eval_steps_per_second": 1.632, "step": 2166 }, { "epoch": 58.0, "learning_rate": 8.839999999999999e-07, "loss": 4.6492, "step": 2204 }, { "epoch": 58.0, "eval_accuracy": 0.36974584555229717, "eval_loss": 4.543338298797607, "eval_runtime": 0.6159, "eval_samples_per_second": 6.494, "eval_steps_per_second": 1.624, "step": 2204 }, { "epoch": 59.0, "learning_rate": 8.82e-07, "loss": 4.6442, "step": 2242 }, { "epoch": 59.0, "eval_accuracy": 0.36974584555229717, "eval_loss": 4.5420732498168945, "eval_runtime": 0.6226, "eval_samples_per_second": 6.424, "eval_steps_per_second": 1.606, "step": 2242 }, { "epoch": 60.0, "learning_rate": 8.799999999999999e-07, "loss": 4.632, "step": 2280 }, { "epoch": 60.0, "eval_accuracy": 0.3699902248289345, "eval_loss": 4.5368475914001465, "eval_runtime": 0.615, "eval_samples_per_second": 6.504, "eval_steps_per_second": 1.626, "step": 2280 }, { "epoch": 61.0, "learning_rate": 8.78e-07, "loss": 4.6256, "step": 2318 }, { "epoch": 61.0, "eval_accuracy": 0.3704789833822092, "eval_loss": 4.532083511352539, "eval_runtime": 0.6138, "eval_samples_per_second": 6.517, "eval_steps_per_second": 1.629, "step": 2318 }, { "epoch": 62.0, "learning_rate": 8.76e-07, "loss": 4.6215, "step": 2356 }, { "epoch": 62.0, "eval_accuracy": 0.3699902248289345, "eval_loss": 4.528621673583984, "eval_runtime": 0.6132, "eval_samples_per_second": 6.524, "eval_steps_per_second": 1.631, "step": 2356 }, { "epoch": 63.0, "learning_rate": 8.739999999999999e-07, "loss": 4.6142, "step": 2394 }, { "epoch": 63.0, "eval_accuracy": 0.37023460410557185, "eval_loss": 4.524003982543945, "eval_runtime": 0.6147, "eval_samples_per_second": 6.507, "eval_steps_per_second": 1.627, "step": 2394 }, { "epoch": 64.0, "learning_rate": 8.72e-07, "loss": 4.6041, "step": 2432 }, { "epoch": 64.0, "eval_accuracy": 0.3709677419354839, "eval_loss": 4.519542694091797, "eval_runtime": 0.6141, "eval_samples_per_second": 6.514, "eval_steps_per_second": 1.628, "step": 2432 }, { "epoch": 65.0, "learning_rate": 8.699999999999999e-07, "loss": 4.5984, "step": 2470 }, { "epoch": 65.0, "eval_accuracy": 0.37145650048875856, "eval_loss": 4.514742851257324, "eval_runtime": 0.6129, "eval_samples_per_second": 6.526, "eval_steps_per_second": 1.632, "step": 2470 }, { "epoch": 66.0, "learning_rate": 8.68e-07, "loss": 4.5919, "step": 2508 }, { "epoch": 66.0, "eval_accuracy": 0.37267839687194526, "eval_loss": 4.511608600616455, "eval_runtime": 0.6134, "eval_samples_per_second": 6.521, "eval_steps_per_second": 1.63, "step": 2508 }, { "epoch": 67.0, "learning_rate": 8.659999999999999e-07, "loss": 4.5838, "step": 2546 }, { "epoch": 67.0, "eval_accuracy": 0.3724340175953079, "eval_loss": 4.5069780349731445, "eval_runtime": 0.6152, "eval_samples_per_second": 6.502, "eval_steps_per_second": 1.626, "step": 2546 }, { "epoch": 68.0, "learning_rate": 8.639999999999999e-07, "loss": 4.5733, "step": 2584 }, { "epoch": 68.0, "eval_accuracy": 0.3724340175953079, "eval_loss": 4.503517150878906, "eval_runtime": 0.6144, "eval_samples_per_second": 6.51, "eval_steps_per_second": 1.628, "step": 2584 }, { "epoch": 69.0, "learning_rate": 8.62e-07, "loss": 4.5642, "step": 2622 }, { "epoch": 69.0, "eval_accuracy": 0.3721896383186706, "eval_loss": 4.500667095184326, "eval_runtime": 0.6248, "eval_samples_per_second": 6.402, "eval_steps_per_second": 1.6, "step": 2622 }, { "epoch": 70.0, "learning_rate": 8.599999999999999e-07, "loss": 4.5607, "step": 2660 }, { "epoch": 70.0, "eval_accuracy": 0.37194525904203324, "eval_loss": 4.4967780113220215, "eval_runtime": 0.6144, "eval_samples_per_second": 6.51, "eval_steps_per_second": 1.627, "step": 2660 }, { "epoch": 71.0, "learning_rate": 8.58e-07, "loss": 4.5543, "step": 2698 }, { "epoch": 71.0, "eval_accuracy": 0.3729227761485826, "eval_loss": 4.492751121520996, "eval_runtime": 0.615, "eval_samples_per_second": 6.505, "eval_steps_per_second": 1.626, "step": 2698 }, { "epoch": 72.0, "learning_rate": 8.559999999999999e-07, "loss": 4.5502, "step": 2736 }, { "epoch": 72.0, "eval_accuracy": 0.3729227761485826, "eval_loss": 4.489741325378418, "eval_runtime": 0.6243, "eval_samples_per_second": 6.407, "eval_steps_per_second": 1.602, "step": 2736 }, { "epoch": 73.0, "learning_rate": 8.539999999999999e-07, "loss": 4.5505, "step": 2774 }, { "epoch": 73.0, "eval_accuracy": 0.3736559139784946, "eval_loss": 4.487486839294434, "eval_runtime": 0.6147, "eval_samples_per_second": 6.507, "eval_steps_per_second": 1.627, "step": 2774 }, { "epoch": 74.0, "learning_rate": 8.52e-07, "loss": 4.537, "step": 2812 }, { "epoch": 74.0, "eval_accuracy": 0.37316715542521994, "eval_loss": 4.483956813812256, "eval_runtime": 0.6129, "eval_samples_per_second": 6.526, "eval_steps_per_second": 1.632, "step": 2812 }, { "epoch": 75.0, "learning_rate": 8.499999999999999e-07, "loss": 4.529, "step": 2850 }, { "epoch": 75.0, "eval_accuracy": 0.374633431085044, "eval_loss": 4.480215549468994, "eval_runtime": 0.6128, "eval_samples_per_second": 6.527, "eval_steps_per_second": 1.632, "step": 2850 }, { "epoch": 76.0, "learning_rate": 8.48e-07, "loss": 4.5201, "step": 2888 }, { "epoch": 76.0, "eval_accuracy": 0.37487781036168133, "eval_loss": 4.4763689041137695, "eval_runtime": 0.614, "eval_samples_per_second": 6.514, "eval_steps_per_second": 1.629, "step": 2888 }, { "epoch": 77.0, "learning_rate": 8.459999999999999e-07, "loss": 4.5176, "step": 2926 }, { "epoch": 77.0, "eval_accuracy": 0.37512218963831867, "eval_loss": 4.472899436950684, "eval_runtime": 0.6134, "eval_samples_per_second": 6.521, "eval_steps_per_second": 1.63, "step": 2926 }, { "epoch": 78.0, "learning_rate": 8.439999999999999e-07, "loss": 4.5087, "step": 2964 }, { "epoch": 78.0, "eval_accuracy": 0.37512218963831867, "eval_loss": 4.4715986251831055, "eval_runtime": 0.627, "eval_samples_per_second": 6.379, "eval_steps_per_second": 1.595, "step": 2964 }, { "epoch": 79.0, "learning_rate": 8.419999999999999e-07, "loss": 4.504, "step": 3002 }, { "epoch": 79.0, "eval_accuracy": 0.37438905180840665, "eval_loss": 4.468360900878906, "eval_runtime": 0.6153, "eval_samples_per_second": 6.501, "eval_steps_per_second": 1.625, "step": 3002 }, { "epoch": 80.0, "learning_rate": 8.399999999999999e-07, "loss": 4.4914, "step": 3040 }, { "epoch": 80.0, "eval_accuracy": 0.37512218963831867, "eval_loss": 4.463363170623779, "eval_runtime": 0.614, "eval_samples_per_second": 6.515, "eval_steps_per_second": 1.629, "step": 3040 }, { "epoch": 81.0, "learning_rate": 8.38e-07, "loss": 4.4907, "step": 3078 }, { "epoch": 81.0, "eval_accuracy": 0.37512218963831867, "eval_loss": 4.461572170257568, "eval_runtime": 0.6134, "eval_samples_per_second": 6.521, "eval_steps_per_second": 1.63, "step": 3078 }, { "epoch": 82.0, "learning_rate": 8.359999999999999e-07, "loss": 4.483, "step": 3116 }, { "epoch": 82.0, "eval_accuracy": 0.375366568914956, "eval_loss": 4.45780086517334, "eval_runtime": 0.6134, "eval_samples_per_second": 6.521, "eval_steps_per_second": 1.63, "step": 3116 }, { "epoch": 83.0, "learning_rate": 8.34e-07, "loss": 4.4792, "step": 3154 }, { "epoch": 83.0, "eval_accuracy": 0.3741446725317693, "eval_loss": 4.454073429107666, "eval_runtime": 0.6132, "eval_samples_per_second": 6.523, "eval_steps_per_second": 1.631, "step": 3154 }, { "epoch": 84.0, "learning_rate": 8.319999999999999e-07, "loss": 4.4705, "step": 3192 }, { "epoch": 84.0, "eval_accuracy": 0.37438905180840665, "eval_loss": 4.451131820678711, "eval_runtime": 0.6148, "eval_samples_per_second": 6.506, "eval_steps_per_second": 1.627, "step": 3192 }, { "epoch": 85.0, "learning_rate": 8.299999999999999e-07, "loss": 4.4647, "step": 3230 }, { "epoch": 85.0, "eval_accuracy": 0.37487781036168133, "eval_loss": 4.448835372924805, "eval_runtime": 0.6135, "eval_samples_per_second": 6.52, "eval_steps_per_second": 1.63, "step": 3230 }, { "epoch": 86.0, "learning_rate": 8.28e-07, "loss": 4.4617, "step": 3268 }, { "epoch": 86.0, "eval_accuracy": 0.37512218963831867, "eval_loss": 4.444460391998291, "eval_runtime": 0.6143, "eval_samples_per_second": 6.511, "eval_steps_per_second": 1.628, "step": 3268 }, { "epoch": 87.0, "learning_rate": 8.259999999999999e-07, "loss": 4.453, "step": 3306 }, { "epoch": 87.0, "eval_accuracy": 0.37512218963831867, "eval_loss": 4.438481330871582, "eval_runtime": 0.6245, "eval_samples_per_second": 6.405, "eval_steps_per_second": 1.601, "step": 3306 }, { "epoch": 88.0, "learning_rate": 8.24e-07, "loss": 4.4488, "step": 3344 }, { "epoch": 88.0, "eval_accuracy": 0.3763440860215054, "eval_loss": 4.435319423675537, "eval_runtime": 0.6159, "eval_samples_per_second": 6.494, "eval_steps_per_second": 1.624, "step": 3344 }, { "epoch": 89.0, "learning_rate": 8.219999999999999e-07, "loss": 4.4424, "step": 3382 }, { "epoch": 89.0, "eval_accuracy": 0.3765884652981427, "eval_loss": 4.432227611541748, "eval_runtime": 0.6169, "eval_samples_per_second": 6.484, "eval_steps_per_second": 1.621, "step": 3382 }, { "epoch": 90.0, "learning_rate": 8.199999999999999e-07, "loss": 4.433, "step": 3420 }, { "epoch": 90.0, "eval_accuracy": 0.3765884652981427, "eval_loss": 4.4299702644348145, "eval_runtime": 0.6255, "eval_samples_per_second": 6.395, "eval_steps_per_second": 1.599, "step": 3420 }, { "epoch": 91.0, "learning_rate": 8.179999999999999e-07, "loss": 4.4252, "step": 3458 }, { "epoch": 91.0, "eval_accuracy": 0.3763440860215054, "eval_loss": 4.425891399383545, "eval_runtime": 0.6135, "eval_samples_per_second": 6.52, "eval_steps_per_second": 1.63, "step": 3458 }, { "epoch": 92.0, "learning_rate": 8.159999999999999e-07, "loss": 4.4226, "step": 3496 }, { "epoch": 92.0, "eval_accuracy": 0.37732160312805474, "eval_loss": 4.421455383300781, "eval_runtime": 0.6133, "eval_samples_per_second": 6.522, "eval_steps_per_second": 1.631, "step": 3496 }, { "epoch": 93.0, "learning_rate": 8.14e-07, "loss": 4.4144, "step": 3534 }, { "epoch": 93.0, "eval_accuracy": 0.3770772238514174, "eval_loss": 4.41888427734375, "eval_runtime": 0.614, "eval_samples_per_second": 6.515, "eval_steps_per_second": 1.629, "step": 3534 }, { "epoch": 94.0, "learning_rate": 8.12e-07, "loss": 4.4047, "step": 3572 }, { "epoch": 94.0, "eval_accuracy": 0.3770772238514174, "eval_loss": 4.416011333465576, "eval_runtime": 0.6125, "eval_samples_per_second": 6.53, "eval_steps_per_second": 1.633, "step": 3572 }, { "epoch": 95.0, "learning_rate": 8.1e-07, "loss": 4.4071, "step": 3610 }, { "epoch": 95.0, "eval_accuracy": 0.37732160312805474, "eval_loss": 4.413094997406006, "eval_runtime": 0.6249, "eval_samples_per_second": 6.401, "eval_steps_per_second": 1.6, "step": 3610 }, { "epoch": 96.0, "learning_rate": 8.08e-07, "loss": 4.3975, "step": 3648 }, { "epoch": 96.0, "eval_accuracy": 0.37732160312805474, "eval_loss": 4.409505367279053, "eval_runtime": 0.6123, "eval_samples_per_second": 6.532, "eval_steps_per_second": 1.633, "step": 3648 }, { "epoch": 97.0, "learning_rate": 8.06e-07, "loss": 4.3897, "step": 3686 }, { "epoch": 97.0, "eval_accuracy": 0.3770772238514174, "eval_loss": 4.408539772033691, "eval_runtime": 1.9183, "eval_samples_per_second": 2.085, "eval_steps_per_second": 0.521, "step": 3686 }, { "epoch": 98.0, "learning_rate": 8.04e-07, "loss": 4.3869, "step": 3724 }, { "epoch": 98.0, "eval_accuracy": 0.3770772238514174, "eval_loss": 4.405216693878174, "eval_runtime": 0.6138, "eval_samples_per_second": 6.517, "eval_steps_per_second": 1.629, "step": 3724 }, { "epoch": 99.0, "learning_rate": 8.02e-07, "loss": 4.3751, "step": 3762 }, { "epoch": 99.0, "eval_accuracy": 0.37732160312805474, "eval_loss": 4.402120113372803, "eval_runtime": 0.6144, "eval_samples_per_second": 6.51, "eval_steps_per_second": 1.628, "step": 3762 }, { "epoch": 100.0, "learning_rate": 8e-07, "loss": 4.3698, "step": 3800 }, { "epoch": 100.0, "eval_accuracy": 0.37683284457478006, "eval_loss": 4.398764610290527, "eval_runtime": 0.6238, "eval_samples_per_second": 6.412, "eval_steps_per_second": 1.603, "step": 3800 }, { "epoch": 101.0, "learning_rate": 7.98e-07, "loss": 4.368, "step": 3838 }, { "epoch": 101.0, "eval_accuracy": 0.37683284457478006, "eval_loss": 4.394458293914795, "eval_runtime": 0.6261, "eval_samples_per_second": 6.388, "eval_steps_per_second": 1.597, "step": 3838 }, { "epoch": 102.0, "learning_rate": 7.96e-07, "loss": 4.3643, "step": 3876 }, { "epoch": 102.0, "eval_accuracy": 0.3770772238514174, "eval_loss": 4.391842842102051, "eval_runtime": 0.614, "eval_samples_per_second": 6.515, "eval_steps_per_second": 1.629, "step": 3876 }, { "epoch": 103.0, "learning_rate": 7.94e-07, "loss": 4.3552, "step": 3914 }, { "epoch": 103.0, "eval_accuracy": 0.3765884652981427, "eval_loss": 4.389264106750488, "eval_runtime": 0.6137, "eval_samples_per_second": 6.517, "eval_steps_per_second": 1.629, "step": 3914 }, { "epoch": 104.0, "learning_rate": 7.92e-07, "loss": 4.3478, "step": 3952 }, { "epoch": 104.0, "eval_accuracy": 0.3775659824046921, "eval_loss": 4.386912822723389, "eval_runtime": 0.6129, "eval_samples_per_second": 6.526, "eval_steps_per_second": 1.632, "step": 3952 }, { "epoch": 105.0, "learning_rate": 7.9e-07, "loss": 4.3438, "step": 3990 }, { "epoch": 105.0, "eval_accuracy": 0.37805474095796676, "eval_loss": 4.3847856521606445, "eval_runtime": 0.6142, "eval_samples_per_second": 6.512, "eval_steps_per_second": 1.628, "step": 3990 }, { "epoch": 106.0, "learning_rate": 7.88e-07, "loss": 4.3362, "step": 4028 }, { "epoch": 106.0, "eval_accuracy": 0.37732160312805474, "eval_loss": 4.38198184967041, "eval_runtime": 0.6128, "eval_samples_per_second": 6.528, "eval_steps_per_second": 1.632, "step": 4028 }, { "epoch": 107.0, "learning_rate": 7.86e-07, "loss": 4.3356, "step": 4066 }, { "epoch": 107.0, "eval_accuracy": 0.3778103616813294, "eval_loss": 4.37683629989624, "eval_runtime": 0.6124, "eval_samples_per_second": 6.531, "eval_steps_per_second": 1.633, "step": 4066 }, { "epoch": 108.0, "learning_rate": 7.84e-07, "loss": 4.3263, "step": 4104 }, { "epoch": 108.0, "eval_accuracy": 0.3775659824046921, "eval_loss": 4.376446723937988, "eval_runtime": 0.6134, "eval_samples_per_second": 6.521, "eval_steps_per_second": 1.63, "step": 4104 }, { "epoch": 109.0, "learning_rate": 7.82e-07, "loss": 4.3238, "step": 4142 }, { "epoch": 109.0, "eval_accuracy": 0.3778103616813294, "eval_loss": 4.373225688934326, "eval_runtime": 0.6125, "eval_samples_per_second": 6.531, "eval_steps_per_second": 1.633, "step": 4142 }, { "epoch": 110.0, "learning_rate": 7.799999999999999e-07, "loss": 4.3157, "step": 4180 }, { "epoch": 110.0, "eval_accuracy": 0.37805474095796676, "eval_loss": 4.369943618774414, "eval_runtime": 0.6137, "eval_samples_per_second": 6.518, "eval_steps_per_second": 1.63, "step": 4180 }, { "epoch": 111.0, "learning_rate": 7.78e-07, "loss": 4.311, "step": 4218 }, { "epoch": 111.0, "eval_accuracy": 0.37805474095796676, "eval_loss": 4.367816925048828, "eval_runtime": 0.6142, "eval_samples_per_second": 6.513, "eval_steps_per_second": 1.628, "step": 4218 }, { "epoch": 112.0, "learning_rate": 7.76e-07, "loss": 4.3048, "step": 4256 }, { "epoch": 112.0, "eval_accuracy": 0.3787878787878788, "eval_loss": 4.364564895629883, "eval_runtime": 0.6133, "eval_samples_per_second": 6.522, "eval_steps_per_second": 1.631, "step": 4256 }, { "epoch": 113.0, "learning_rate": 7.74e-07, "loss": 4.2955, "step": 4294 }, { "epoch": 113.0, "eval_accuracy": 0.37927663734115347, "eval_loss": 4.364035606384277, "eval_runtime": 0.613, "eval_samples_per_second": 6.525, "eval_steps_per_second": 1.631, "step": 4294 }, { "epoch": 114.0, "learning_rate": 7.72e-07, "loss": 4.2914, "step": 4332 }, { "epoch": 114.0, "eval_accuracy": 0.37927663734115347, "eval_loss": 4.360426425933838, "eval_runtime": 0.613, "eval_samples_per_second": 6.525, "eval_steps_per_second": 1.631, "step": 4332 }, { "epoch": 115.0, "learning_rate": 7.699999999999999e-07, "loss": 4.286, "step": 4370 }, { "epoch": 115.0, "eval_accuracy": 0.3790322580645161, "eval_loss": 4.3580002784729, "eval_runtime": 0.613, "eval_samples_per_second": 6.525, "eval_steps_per_second": 1.631, "step": 4370 }, { "epoch": 116.0, "learning_rate": 7.68e-07, "loss": 4.2857, "step": 4408 }, { "epoch": 116.0, "eval_accuracy": 0.3790322580645161, "eval_loss": 4.354123115539551, "eval_runtime": 0.6123, "eval_samples_per_second": 6.533, "eval_steps_per_second": 1.633, "step": 4408 }, { "epoch": 117.0, "learning_rate": 7.66e-07, "loss": 4.2776, "step": 4446 }, { "epoch": 117.0, "eval_accuracy": 0.37927663734115347, "eval_loss": 4.352733612060547, "eval_runtime": 0.6122, "eval_samples_per_second": 6.534, "eval_steps_per_second": 1.633, "step": 4446 }, { "epoch": 118.0, "learning_rate": 7.64e-07, "loss": 4.2734, "step": 4484 }, { "epoch": 118.0, "eval_accuracy": 0.38025415444770283, "eval_loss": 4.348194599151611, "eval_runtime": 0.6137, "eval_samples_per_second": 6.518, "eval_steps_per_second": 1.629, "step": 4484 }, { "epoch": 119.0, "learning_rate": 7.62e-07, "loss": 4.2646, "step": 4522 }, { "epoch": 119.0, "eval_accuracy": 0.3800097751710655, "eval_loss": 4.346100330352783, "eval_runtime": 0.7901, "eval_samples_per_second": 5.062, "eval_steps_per_second": 1.266, "step": 4522 }, { "epoch": 120.0, "learning_rate": 7.599999999999999e-07, "loss": 4.2632, "step": 4560 }, { "epoch": 120.0, "eval_accuracy": 0.38025415444770283, "eval_loss": 4.3445892333984375, "eval_runtime": 0.6129, "eval_samples_per_second": 6.527, "eval_steps_per_second": 1.632, "step": 4560 }, { "epoch": 121.0, "learning_rate": 7.58e-07, "loss": 4.2586, "step": 4598 }, { "epoch": 121.0, "eval_accuracy": 0.3807429130009775, "eval_loss": 4.340865135192871, "eval_runtime": 0.6132, "eval_samples_per_second": 6.523, "eval_steps_per_second": 1.631, "step": 4598 }, { "epoch": 122.0, "learning_rate": 7.559999999999999e-07, "loss": 4.2564, "step": 4636 }, { "epoch": 122.0, "eval_accuracy": 0.3812316715542522, "eval_loss": 4.3399505615234375, "eval_runtime": 0.6124, "eval_samples_per_second": 6.532, "eval_steps_per_second": 1.633, "step": 4636 }, { "epoch": 123.0, "learning_rate": 7.54e-07, "loss": 4.2423, "step": 4674 }, { "epoch": 123.0, "eval_accuracy": 0.3807429130009775, "eval_loss": 4.335657596588135, "eval_runtime": 0.6134, "eval_samples_per_second": 6.521, "eval_steps_per_second": 1.63, "step": 4674 }, { "epoch": 124.0, "learning_rate": 7.52e-07, "loss": 4.2425, "step": 4712 }, { "epoch": 124.0, "eval_accuracy": 0.3807429130009775, "eval_loss": 4.3334856033325195, "eval_runtime": 0.6131, "eval_samples_per_second": 6.524, "eval_steps_per_second": 1.631, "step": 4712 }, { "epoch": 125.0, "learning_rate": 7.5e-07, "loss": 4.2367, "step": 4750 }, { "epoch": 125.0, "eval_accuracy": 0.38098729227761485, "eval_loss": 4.330577373504639, "eval_runtime": 0.613, "eval_samples_per_second": 6.526, "eval_steps_per_second": 1.631, "step": 4750 }, { "epoch": 126.0, "learning_rate": 7.48e-07, "loss": 4.2301, "step": 4788 }, { "epoch": 126.0, "eval_accuracy": 0.38147605083088953, "eval_loss": 4.3291544914245605, "eval_runtime": 0.6122, "eval_samples_per_second": 6.534, "eval_steps_per_second": 1.633, "step": 4788 }, { "epoch": 127.0, "learning_rate": 7.459999999999999e-07, "loss": 4.2286, "step": 4826 }, { "epoch": 127.0, "eval_accuracy": 0.3812316715542522, "eval_loss": 4.327591419219971, "eval_runtime": 0.6134, "eval_samples_per_second": 6.521, "eval_steps_per_second": 1.63, "step": 4826 }, { "epoch": 128.0, "learning_rate": 7.44e-07, "loss": 4.2184, "step": 4864 }, { "epoch": 128.0, "eval_accuracy": 0.38220918866080156, "eval_loss": 4.32462215423584, "eval_runtime": 0.6129, "eval_samples_per_second": 6.527, "eval_steps_per_second": 1.632, "step": 4864 }, { "epoch": 129.0, "learning_rate": 7.42e-07, "loss": 4.2156, "step": 4902 }, { "epoch": 129.0, "eval_accuracy": 0.38269794721407624, "eval_loss": 4.3210039138793945, "eval_runtime": 0.6123, "eval_samples_per_second": 6.533, "eval_steps_per_second": 1.633, "step": 4902 }, { "epoch": 130.0, "learning_rate": 7.4e-07, "loss": 4.2116, "step": 4940 }, { "epoch": 130.0, "eval_accuracy": 0.38343108504398826, "eval_loss": 4.318737506866455, "eval_runtime": 0.6134, "eval_samples_per_second": 6.521, "eval_steps_per_second": 1.63, "step": 4940 }, { "epoch": 131.0, "learning_rate": 7.38e-07, "loss": 4.2008, "step": 4978 }, { "epoch": 131.0, "eval_accuracy": 0.38343108504398826, "eval_loss": 4.316496849060059, "eval_runtime": 0.6128, "eval_samples_per_second": 6.527, "eval_steps_per_second": 1.632, "step": 4978 }, { "epoch": 132.0, "learning_rate": 7.359999999999999e-07, "loss": 4.1995, "step": 5016 }, { "epoch": 132.0, "eval_accuracy": 0.38343108504398826, "eval_loss": 4.3134074211120605, "eval_runtime": 0.6223, "eval_samples_per_second": 6.428, "eval_steps_per_second": 1.607, "step": 5016 }, { "epoch": 133.0, "learning_rate": 7.34e-07, "loss": 4.19, "step": 5054 }, { "epoch": 133.0, "eval_accuracy": 0.3841642228739003, "eval_loss": 4.313587665557861, "eval_runtime": 0.6139, "eval_samples_per_second": 6.515, "eval_steps_per_second": 1.629, "step": 5054 }, { "epoch": 134.0, "learning_rate": 7.319999999999999e-07, "loss": 4.1828, "step": 5092 }, { "epoch": 134.0, "eval_accuracy": 0.3841642228739003, "eval_loss": 4.311624050140381, "eval_runtime": 0.6129, "eval_samples_per_second": 6.526, "eval_steps_per_second": 1.631, "step": 5092 }, { "epoch": 135.0, "learning_rate": 7.3e-07, "loss": 4.1815, "step": 5130 }, { "epoch": 135.0, "eval_accuracy": 0.38465298142717497, "eval_loss": 4.306524276733398, "eval_runtime": 0.613, "eval_samples_per_second": 6.525, "eval_steps_per_second": 1.631, "step": 5130 }, { "epoch": 136.0, "learning_rate": 7.28e-07, "loss": 4.1771, "step": 5168 }, { "epoch": 136.0, "eval_accuracy": 0.38391984359726294, "eval_loss": 4.305095195770264, "eval_runtime": 0.6123, "eval_samples_per_second": 6.533, "eval_steps_per_second": 1.633, "step": 5168 }, { "epoch": 137.0, "learning_rate": 7.259999999999999e-07, "loss": 4.1744, "step": 5206 }, { "epoch": 137.0, "eval_accuracy": 0.38465298142717497, "eval_loss": 4.301632881164551, "eval_runtime": 1.3483, "eval_samples_per_second": 2.967, "eval_steps_per_second": 0.742, "step": 5206 }, { "epoch": 138.0, "learning_rate": 7.24e-07, "loss": 4.1717, "step": 5244 }, { "epoch": 138.0, "eval_accuracy": 0.38465298142717497, "eval_loss": 4.297549247741699, "eval_runtime": 0.6216, "eval_samples_per_second": 6.435, "eval_steps_per_second": 1.609, "step": 5244 }, { "epoch": 139.0, "learning_rate": 7.219999999999999e-07, "loss": 4.1616, "step": 5282 }, { "epoch": 139.0, "eval_accuracy": 0.38465298142717497, "eval_loss": 4.296638488769531, "eval_runtime": 0.6135, "eval_samples_per_second": 6.52, "eval_steps_per_second": 1.63, "step": 5282 }, { "epoch": 140.0, "learning_rate": 7.2e-07, "loss": 4.1582, "step": 5320 }, { "epoch": 140.0, "eval_accuracy": 0.38465298142717497, "eval_loss": 4.29475212097168, "eval_runtime": 0.6136, "eval_samples_per_second": 6.519, "eval_steps_per_second": 1.63, "step": 5320 }, { "epoch": 141.0, "learning_rate": 7.179999999999999e-07, "loss": 4.1583, "step": 5358 }, { "epoch": 141.0, "eval_accuracy": 0.3848973607038123, "eval_loss": 4.293056488037109, "eval_runtime": 3.2385, "eval_samples_per_second": 1.235, "eval_steps_per_second": 0.309, "step": 5358 }, { "epoch": 142.0, "learning_rate": 7.159999999999999e-07, "loss": 4.148, "step": 5396 }, { "epoch": 142.0, "eval_accuracy": 0.385386119257087, "eval_loss": 4.289401054382324, "eval_runtime": 0.6128, "eval_samples_per_second": 6.528, "eval_steps_per_second": 1.632, "step": 5396 }, { "epoch": 143.0, "learning_rate": 7.14e-07, "loss": 4.1417, "step": 5434 }, { "epoch": 143.0, "eval_accuracy": 0.3848973607038123, "eval_loss": 4.286114692687988, "eval_runtime": 0.6137, "eval_samples_per_second": 6.518, "eval_steps_per_second": 1.63, "step": 5434 }, { "epoch": 144.0, "learning_rate": 7.119999999999999e-07, "loss": 4.1386, "step": 5472 }, { "epoch": 144.0, "eval_accuracy": 0.386119257086999, "eval_loss": 4.286536693572998, "eval_runtime": 0.6139, "eval_samples_per_second": 6.515, "eval_steps_per_second": 1.629, "step": 5472 }, { "epoch": 145.0, "learning_rate": 7.1e-07, "loss": 4.133, "step": 5510 }, { "epoch": 145.0, "eval_accuracy": 0.386119257086999, "eval_loss": 4.283446311950684, "eval_runtime": 0.6128, "eval_samples_per_second": 6.528, "eval_steps_per_second": 1.632, "step": 5510 }, { "epoch": 146.0, "learning_rate": 7.079999999999999e-07, "loss": 4.129, "step": 5548 }, { "epoch": 146.0, "eval_accuracy": 0.38636363636363635, "eval_loss": 4.279318332672119, "eval_runtime": 0.6163, "eval_samples_per_second": 6.491, "eval_steps_per_second": 1.623, "step": 5548 }, { "epoch": 147.0, "learning_rate": 7.059999999999999e-07, "loss": 4.12, "step": 5586 }, { "epoch": 147.0, "eval_accuracy": 0.386119257086999, "eval_loss": 4.278520584106445, "eval_runtime": 0.6129, "eval_samples_per_second": 6.526, "eval_steps_per_second": 1.631, "step": 5586 }, { "epoch": 148.0, "learning_rate": 7.04e-07, "loss": 4.1206, "step": 5624 }, { "epoch": 148.0, "eval_accuracy": 0.38636363636363635, "eval_loss": 4.274984836578369, "eval_runtime": 0.6214, "eval_samples_per_second": 6.437, "eval_steps_per_second": 1.609, "step": 5624 }, { "epoch": 149.0, "learning_rate": 7.019999999999999e-07, "loss": 4.1226, "step": 5662 }, { "epoch": 149.0, "eval_accuracy": 0.3870967741935484, "eval_loss": 4.274369716644287, "eval_runtime": 0.6123, "eval_samples_per_second": 6.533, "eval_steps_per_second": 1.633, "step": 5662 }, { "epoch": 150.0, "learning_rate": 7e-07, "loss": 4.1104, "step": 5700 }, { "epoch": 150.0, "eval_accuracy": 0.3866080156402737, "eval_loss": 4.272345066070557, "eval_runtime": 0.6134, "eval_samples_per_second": 6.521, "eval_steps_per_second": 1.63, "step": 5700 }, { "epoch": 151.0, "learning_rate": 6.979999999999999e-07, "loss": 4.1093, "step": 5738 }, { "epoch": 151.0, "eval_accuracy": 0.3870967741935484, "eval_loss": 4.267661094665527, "eval_runtime": 0.6146, "eval_samples_per_second": 6.509, "eval_steps_per_second": 1.627, "step": 5738 }, { "epoch": 152.0, "learning_rate": 6.959999999999999e-07, "loss": 4.0989, "step": 5776 }, { "epoch": 152.0, "eval_accuracy": 0.38685239491691104, "eval_loss": 4.265379428863525, "eval_runtime": 0.6129, "eval_samples_per_second": 6.526, "eval_steps_per_second": 1.632, "step": 5776 }, { "epoch": 153.0, "learning_rate": 6.939999999999999e-07, "loss": 4.1035, "step": 5814 }, { "epoch": 153.0, "eval_accuracy": 0.3878299120234604, "eval_loss": 4.264577865600586, "eval_runtime": 0.6137, "eval_samples_per_second": 6.518, "eval_steps_per_second": 1.629, "step": 5814 }, { "epoch": 154.0, "learning_rate": 6.919999999999999e-07, "loss": 4.0949, "step": 5852 }, { "epoch": 154.0, "eval_accuracy": 0.38807429130009774, "eval_loss": 4.263481616973877, "eval_runtime": 0.6135, "eval_samples_per_second": 6.52, "eval_steps_per_second": 1.63, "step": 5852 }, { "epoch": 155.0, "learning_rate": 6.9e-07, "loss": 4.0921, "step": 5890 }, { "epoch": 155.0, "eval_accuracy": 0.3883186705767351, "eval_loss": 4.260597229003906, "eval_runtime": 0.6138, "eval_samples_per_second": 6.517, "eval_steps_per_second": 1.629, "step": 5890 }, { "epoch": 156.0, "learning_rate": 6.879999999999999e-07, "loss": 4.0883, "step": 5928 }, { "epoch": 156.0, "eval_accuracy": 0.3885630498533724, "eval_loss": 4.256484508514404, "eval_runtime": 0.6125, "eval_samples_per_second": 6.531, "eval_steps_per_second": 1.633, "step": 5928 }, { "epoch": 157.0, "learning_rate": 6.86e-07, "loss": 4.0794, "step": 5966 }, { "epoch": 157.0, "eval_accuracy": 0.38929618768328444, "eval_loss": 4.25582218170166, "eval_runtime": 0.6128, "eval_samples_per_second": 6.528, "eval_steps_per_second": 1.632, "step": 5966 }, { "epoch": 158.0, "learning_rate": 6.84e-07, "loss": 4.0754, "step": 6004 }, { "epoch": 158.0, "eval_accuracy": 0.38880742913000976, "eval_loss": 4.2530412673950195, "eval_runtime": 0.6131, "eval_samples_per_second": 6.525, "eval_steps_per_second": 1.631, "step": 6004 }, { "epoch": 159.0, "learning_rate": 6.82e-07, "loss": 4.0756, "step": 6042 }, { "epoch": 159.0, "eval_accuracy": 0.38929618768328444, "eval_loss": 4.249640464782715, "eval_runtime": 0.6141, "eval_samples_per_second": 6.514, "eval_steps_per_second": 1.628, "step": 6042 }, { "epoch": 160.0, "learning_rate": 6.800000000000001e-07, "loss": 4.067, "step": 6080 }, { "epoch": 160.0, "eval_accuracy": 0.38880742913000976, "eval_loss": 4.250114917755127, "eval_runtime": 0.6115, "eval_samples_per_second": 6.541, "eval_steps_per_second": 1.635, "step": 6080 }, { "epoch": 161.0, "learning_rate": 6.78e-07, "loss": 4.0627, "step": 6118 }, { "epoch": 161.0, "eval_accuracy": 0.3890518084066471, "eval_loss": 4.24841833114624, "eval_runtime": 0.6136, "eval_samples_per_second": 6.518, "eval_steps_per_second": 1.63, "step": 6118 }, { "epoch": 162.0, "learning_rate": 6.76e-07, "loss": 4.0586, "step": 6156 }, { "epoch": 162.0, "eval_accuracy": 0.3897849462365591, "eval_loss": 4.243945121765137, "eval_runtime": 0.6246, "eval_samples_per_second": 6.404, "eval_steps_per_second": 1.601, "step": 6156 }, { "epoch": 163.0, "learning_rate": 6.74e-07, "loss": 4.0577, "step": 6194 }, { "epoch": 163.0, "eval_accuracy": 0.38929618768328444, "eval_loss": 4.243143081665039, "eval_runtime": 0.6128, "eval_samples_per_second": 6.527, "eval_steps_per_second": 1.632, "step": 6194 }, { "epoch": 164.0, "learning_rate": 6.72e-07, "loss": 4.055, "step": 6232 }, { "epoch": 164.0, "eval_accuracy": 0.3895405669599218, "eval_loss": 4.239078044891357, "eval_runtime": 0.6128, "eval_samples_per_second": 6.527, "eval_steps_per_second": 1.632, "step": 6232 }, { "epoch": 165.0, "learning_rate": 6.7e-07, "loss": 4.0419, "step": 6270 }, { "epoch": 165.0, "eval_accuracy": 0.3895405669599218, "eval_loss": 4.239559650421143, "eval_runtime": 0.6134, "eval_samples_per_second": 6.521, "eval_steps_per_second": 1.63, "step": 6270 }, { "epoch": 166.0, "learning_rate": 6.68e-07, "loss": 4.0411, "step": 6308 }, { "epoch": 166.0, "eval_accuracy": 0.3902737047898338, "eval_loss": 4.236454486846924, "eval_runtime": 0.613, "eval_samples_per_second": 6.526, "eval_steps_per_second": 1.631, "step": 6308 }, { "epoch": 167.0, "learning_rate": 6.66e-07, "loss": 4.0405, "step": 6346 }, { "epoch": 167.0, "eval_accuracy": 0.3907624633431085, "eval_loss": 4.235616683959961, "eval_runtime": 0.6129, "eval_samples_per_second": 6.527, "eval_steps_per_second": 1.632, "step": 6346 }, { "epoch": 168.0, "learning_rate": 6.64e-07, "loss": 4.0327, "step": 6384 }, { "epoch": 168.0, "eval_accuracy": 0.39051808406647115, "eval_loss": 4.234899044036865, "eval_runtime": 0.6121, "eval_samples_per_second": 6.534, "eval_steps_per_second": 1.634, "step": 6384 }, { "epoch": 169.0, "learning_rate": 6.62e-07, "loss": 4.0262, "step": 6422 }, { "epoch": 169.0, "eval_accuracy": 0.3912512218963832, "eval_loss": 4.231151580810547, "eval_runtime": 0.6148, "eval_samples_per_second": 6.506, "eval_steps_per_second": 1.626, "step": 6422 }, { "epoch": 170.0, "learning_rate": 6.6e-07, "loss": 4.0252, "step": 6460 }, { "epoch": 170.0, "eval_accuracy": 0.3912512218963832, "eval_loss": 4.230025291442871, "eval_runtime": 0.6133, "eval_samples_per_second": 6.522, "eval_steps_per_second": 1.631, "step": 6460 }, { "epoch": 171.0, "learning_rate": 6.58e-07, "loss": 4.0237, "step": 6498 }, { "epoch": 171.0, "eval_accuracy": 0.3914956011730205, "eval_loss": 4.225388526916504, "eval_runtime": 0.6133, "eval_samples_per_second": 6.522, "eval_steps_per_second": 1.631, "step": 6498 }, { "epoch": 172.0, "learning_rate": 6.56e-07, "loss": 4.024, "step": 6536 }, { "epoch": 172.0, "eval_accuracy": 0.3919843597262952, "eval_loss": 4.224780082702637, "eval_runtime": 0.6135, "eval_samples_per_second": 6.52, "eval_steps_per_second": 1.63, "step": 6536 }, { "epoch": 173.0, "learning_rate": 6.54e-07, "loss": 4.0137, "step": 6574 }, { "epoch": 173.0, "eval_accuracy": 0.39222873900293254, "eval_loss": 4.221837997436523, "eval_runtime": 0.6138, "eval_samples_per_second": 6.517, "eval_steps_per_second": 1.629, "step": 6574 }, { "epoch": 174.0, "learning_rate": 6.52e-07, "loss": 4.0108, "step": 6612 }, { "epoch": 174.0, "eval_accuracy": 0.3927174975562072, "eval_loss": 4.222439765930176, "eval_runtime": 0.6168, "eval_samples_per_second": 6.485, "eval_steps_per_second": 1.621, "step": 6612 }, { "epoch": 175.0, "learning_rate": 6.5e-07, "loss": 4.0037, "step": 6650 }, { "epoch": 175.0, "eval_accuracy": 0.3939393939393939, "eval_loss": 4.219006538391113, "eval_runtime": 0.6138, "eval_samples_per_second": 6.517, "eval_steps_per_second": 1.629, "step": 6650 }, { "epoch": 176.0, "learning_rate": 6.48e-07, "loss": 4.0021, "step": 6688 }, { "epoch": 176.0, "eval_accuracy": 0.3936950146627566, "eval_loss": 4.218034267425537, "eval_runtime": 0.6156, "eval_samples_per_second": 6.498, "eval_steps_per_second": 1.625, "step": 6688 }, { "epoch": 177.0, "learning_rate": 6.46e-07, "loss": 3.9949, "step": 6726 }, { "epoch": 177.0, "eval_accuracy": 0.39418377321603126, "eval_loss": 4.215020656585693, "eval_runtime": 0.6221, "eval_samples_per_second": 6.43, "eval_steps_per_second": 1.607, "step": 6726 }, { "epoch": 178.0, "learning_rate": 6.44e-07, "loss": 3.9957, "step": 6764 }, { "epoch": 178.0, "eval_accuracy": 0.3939393939393939, "eval_loss": 4.213464260101318, "eval_runtime": 0.6127, "eval_samples_per_second": 6.528, "eval_steps_per_second": 1.632, "step": 6764 }, { "epoch": 179.0, "learning_rate": 6.42e-07, "loss": 3.9923, "step": 6802 }, { "epoch": 179.0, "eval_accuracy": 0.39418377321603126, "eval_loss": 4.209378242492676, "eval_runtime": 0.6122, "eval_samples_per_second": 6.534, "eval_steps_per_second": 1.634, "step": 6802 }, { "epoch": 180.0, "learning_rate": 6.4e-07, "loss": 3.9853, "step": 6840 }, { "epoch": 180.0, "eval_accuracy": 0.3949169110459433, "eval_loss": 4.209150314331055, "eval_runtime": 0.6131, "eval_samples_per_second": 6.524, "eval_steps_per_second": 1.631, "step": 6840 }, { "epoch": 181.0, "learning_rate": 6.38e-07, "loss": 3.9779, "step": 6878 }, { "epoch": 181.0, "eval_accuracy": 0.3949169110459433, "eval_loss": 4.2085700035095215, "eval_runtime": 0.6125, "eval_samples_per_second": 6.531, "eval_steps_per_second": 1.633, "step": 6878 }, { "epoch": 182.0, "learning_rate": 6.36e-07, "loss": 3.9826, "step": 6916 }, { "epoch": 182.0, "eval_accuracy": 0.39467253176930595, "eval_loss": 4.204543590545654, "eval_runtime": 0.6126, "eval_samples_per_second": 6.529, "eval_steps_per_second": 1.632, "step": 6916 }, { "epoch": 183.0, "learning_rate": 6.34e-07, "loss": 3.9775, "step": 6954 }, { "epoch": 183.0, "eval_accuracy": 0.3949169110459433, "eval_loss": 4.201192855834961, "eval_runtime": 0.6131, "eval_samples_per_second": 6.524, "eval_steps_per_second": 1.631, "step": 6954 }, { "epoch": 184.0, "learning_rate": 6.319999999999999e-07, "loss": 3.9706, "step": 6992 }, { "epoch": 184.0, "eval_accuracy": 0.39613880742913, "eval_loss": 4.200508117675781, "eval_runtime": 0.6124, "eval_samples_per_second": 6.531, "eval_steps_per_second": 1.633, "step": 6992 }, { "epoch": 185.0, "learning_rate": 6.3e-07, "loss": 3.9672, "step": 7030 }, { "epoch": 185.0, "eval_accuracy": 0.3956500488758553, "eval_loss": 4.19916296005249, "eval_runtime": 0.6242, "eval_samples_per_second": 6.408, "eval_steps_per_second": 1.602, "step": 7030 }, { "epoch": 186.0, "learning_rate": 6.28e-07, "loss": 3.9707, "step": 7068 }, { "epoch": 186.0, "eval_accuracy": 0.3966275659824047, "eval_loss": 4.196375370025635, "eval_runtime": 0.6135, "eval_samples_per_second": 6.52, "eval_steps_per_second": 1.63, "step": 7068 }, { "epoch": 187.0, "learning_rate": 6.26e-07, "loss": 3.9585, "step": 7106 }, { "epoch": 187.0, "eval_accuracy": 0.39711632453567935, "eval_loss": 4.195079326629639, "eval_runtime": 0.6133, "eval_samples_per_second": 6.522, "eval_steps_per_second": 1.631, "step": 7106 }, { "epoch": 188.0, "learning_rate": 6.24e-07, "loss": 3.9552, "step": 7144 }, { "epoch": 188.0, "eval_accuracy": 0.3966275659824047, "eval_loss": 4.192666530609131, "eval_runtime": 0.6134, "eval_samples_per_second": 6.521, "eval_steps_per_second": 1.63, "step": 7144 }, { "epoch": 189.0, "learning_rate": 6.219999999999999e-07, "loss": 3.9526, "step": 7182 }, { "epoch": 189.0, "eval_accuracy": 0.3966275659824047, "eval_loss": 4.1922197341918945, "eval_runtime": 0.6118, "eval_samples_per_second": 6.538, "eval_steps_per_second": 1.635, "step": 7182 }, { "epoch": 190.0, "learning_rate": 6.2e-07, "loss": 3.9514, "step": 7220 }, { "epoch": 190.0, "eval_accuracy": 0.396871945259042, "eval_loss": 4.18861722946167, "eval_runtime": 0.6118, "eval_samples_per_second": 6.538, "eval_steps_per_second": 1.635, "step": 7220 }, { "epoch": 191.0, "learning_rate": 6.18e-07, "loss": 3.9464, "step": 7258 }, { "epoch": 191.0, "eval_accuracy": 0.39760508308895404, "eval_loss": 4.188557147979736, "eval_runtime": 0.667, "eval_samples_per_second": 5.997, "eval_steps_per_second": 1.499, "step": 7258 }, { "epoch": 192.0, "learning_rate": 6.16e-07, "loss": 3.9433, "step": 7296 }, { "epoch": 192.0, "eval_accuracy": 0.3980938416422287, "eval_loss": 4.185554504394531, "eval_runtime": 0.6187, "eval_samples_per_second": 6.466, "eval_steps_per_second": 1.616, "step": 7296 }, { "epoch": 193.0, "learning_rate": 6.14e-07, "loss": 3.9378, "step": 7334 }, { "epoch": 193.0, "eval_accuracy": 0.3978494623655914, "eval_loss": 4.184579372406006, "eval_runtime": 0.6131, "eval_samples_per_second": 6.524, "eval_steps_per_second": 1.631, "step": 7334 }, { "epoch": 194.0, "learning_rate": 6.119999999999999e-07, "loss": 3.9362, "step": 7372 }, { "epoch": 194.0, "eval_accuracy": 0.3980938416422287, "eval_loss": 4.1830949783325195, "eval_runtime": 0.6132, "eval_samples_per_second": 6.524, "eval_steps_per_second": 1.631, "step": 7372 }, { "epoch": 195.0, "learning_rate": 6.1e-07, "loss": 3.9307, "step": 7410 }, { "epoch": 195.0, "eval_accuracy": 0.3980938416422287, "eval_loss": 4.182034969329834, "eval_runtime": 0.6129, "eval_samples_per_second": 6.526, "eval_steps_per_second": 1.632, "step": 7410 }, { "epoch": 196.0, "learning_rate": 6.079999999999999e-07, "loss": 3.9324, "step": 7448 }, { "epoch": 196.0, "eval_accuracy": 0.3978494623655914, "eval_loss": 4.176692485809326, "eval_runtime": 0.6143, "eval_samples_per_second": 6.511, "eval_steps_per_second": 1.628, "step": 7448 }, { "epoch": 197.0, "learning_rate": 6.06e-07, "loss": 3.9223, "step": 7486 }, { "epoch": 197.0, "eval_accuracy": 0.39833822091886606, "eval_loss": 4.179370403289795, "eval_runtime": 0.6132, "eval_samples_per_second": 6.523, "eval_steps_per_second": 1.631, "step": 7486 }, { "epoch": 198.0, "learning_rate": 6.04e-07, "loss": 3.9279, "step": 7524 }, { "epoch": 198.0, "eval_accuracy": 0.3985826001955034, "eval_loss": 4.1752119064331055, "eval_runtime": 0.6138, "eval_samples_per_second": 6.517, "eval_steps_per_second": 1.629, "step": 7524 }, { "epoch": 199.0, "learning_rate": 6.019999999999999e-07, "loss": 3.9214, "step": 7562 }, { "epoch": 199.0, "eval_accuracy": 0.3980938416422287, "eval_loss": 4.172707557678223, "eval_runtime": 0.6174, "eval_samples_per_second": 6.479, "eval_steps_per_second": 1.62, "step": 7562 }, { "epoch": 200.0, "learning_rate": 6e-07, "loss": 3.9122, "step": 7600 }, { "epoch": 200.0, "eval_accuracy": 0.39882697947214074, "eval_loss": 4.174560070037842, "eval_runtime": 0.7746, "eval_samples_per_second": 5.164, "eval_steps_per_second": 1.291, "step": 7600 }, { "epoch": 201.0, "learning_rate": 5.979999999999999e-07, "loss": 3.9099, "step": 7638 }, { "epoch": 201.0, "eval_accuracy": 0.39956011730205276, "eval_loss": 4.169778823852539, "eval_runtime": 0.6134, "eval_samples_per_second": 6.521, "eval_steps_per_second": 1.63, "step": 7638 }, { "epoch": 202.0, "learning_rate": 5.96e-07, "loss": 3.9075, "step": 7676 }, { "epoch": 202.0, "eval_accuracy": 0.3993157380254154, "eval_loss": 4.169203758239746, "eval_runtime": 0.6199, "eval_samples_per_second": 6.452, "eval_steps_per_second": 1.613, "step": 7676 }, { "epoch": 203.0, "learning_rate": 5.939999999999999e-07, "loss": 3.9095, "step": 7714 }, { "epoch": 203.0, "eval_accuracy": 0.40004887585532745, "eval_loss": 4.16612434387207, "eval_runtime": 0.615, "eval_samples_per_second": 6.505, "eval_steps_per_second": 1.626, "step": 7714 }, { "epoch": 204.0, "learning_rate": 5.919999999999999e-07, "loss": 3.9, "step": 7752 }, { "epoch": 204.0, "eval_accuracy": 0.40078201368523947, "eval_loss": 4.163661956787109, "eval_runtime": 0.6144, "eval_samples_per_second": 6.51, "eval_steps_per_second": 1.628, "step": 7752 }, { "epoch": 205.0, "learning_rate": 5.9e-07, "loss": 3.9004, "step": 7790 }, { "epoch": 205.0, "eval_accuracy": 0.4002932551319648, "eval_loss": 4.161859512329102, "eval_runtime": 0.6136, "eval_samples_per_second": 6.519, "eval_steps_per_second": 1.63, "step": 7790 }, { "epoch": 206.0, "learning_rate": 5.879999999999999e-07, "loss": 3.8978, "step": 7828 }, { "epoch": 206.0, "eval_accuracy": 0.40053763440860213, "eval_loss": 4.160345554351807, "eval_runtime": 0.6636, "eval_samples_per_second": 6.028, "eval_steps_per_second": 1.507, "step": 7828 }, { "epoch": 207.0, "learning_rate": 5.86e-07, "loss": 3.8918, "step": 7866 }, { "epoch": 207.0, "eval_accuracy": 0.40053763440860213, "eval_loss": 4.158294677734375, "eval_runtime": 0.6131, "eval_samples_per_second": 6.524, "eval_steps_per_second": 1.631, "step": 7866 }, { "epoch": 208.0, "learning_rate": 5.839999999999999e-07, "loss": 3.8848, "step": 7904 }, { "epoch": 208.0, "eval_accuracy": 0.40078201368523947, "eval_loss": 4.158019542694092, "eval_runtime": 0.6145, "eval_samples_per_second": 6.509, "eval_steps_per_second": 1.627, "step": 7904 }, { "epoch": 209.0, "learning_rate": 5.819999999999999e-07, "loss": 3.8831, "step": 7942 }, { "epoch": 209.0, "eval_accuracy": 0.40004887585532745, "eval_loss": 4.1576619148254395, "eval_runtime": 0.613, "eval_samples_per_second": 6.525, "eval_steps_per_second": 1.631, "step": 7942 }, { "epoch": 210.0, "learning_rate": 5.8e-07, "loss": 3.8821, "step": 7980 }, { "epoch": 210.0, "eval_accuracy": 0.40053763440860213, "eval_loss": 4.154994487762451, "eval_runtime": 0.6134, "eval_samples_per_second": 6.521, "eval_steps_per_second": 1.63, "step": 7980 }, { "epoch": 211.0, "learning_rate": 5.779999999999999e-07, "loss": 3.8818, "step": 8018 }, { "epoch": 211.0, "eval_accuracy": 0.40078201368523947, "eval_loss": 4.152185440063477, "eval_runtime": 0.6141, "eval_samples_per_second": 6.513, "eval_steps_per_second": 1.628, "step": 8018 }, { "epoch": 212.0, "learning_rate": 5.76e-07, "loss": 3.8764, "step": 8056 }, { "epoch": 212.0, "eval_accuracy": 0.40078201368523947, "eval_loss": 4.152061462402344, "eval_runtime": 0.6147, "eval_samples_per_second": 6.507, "eval_steps_per_second": 1.627, "step": 8056 }, { "epoch": 213.0, "learning_rate": 5.739999999999999e-07, "loss": 3.8704, "step": 8094 }, { "epoch": 213.0, "eval_accuracy": 0.4010263929618768, "eval_loss": 4.14907693862915, "eval_runtime": 0.6221, "eval_samples_per_second": 6.43, "eval_steps_per_second": 1.607, "step": 8094 }, { "epoch": 214.0, "learning_rate": 5.719999999999999e-07, "loss": 3.8725, "step": 8132 }, { "epoch": 214.0, "eval_accuracy": 0.4010263929618768, "eval_loss": 4.149218559265137, "eval_runtime": 0.6135, "eval_samples_per_second": 6.52, "eval_steps_per_second": 1.63, "step": 8132 }, { "epoch": 215.0, "learning_rate": 5.699999999999999e-07, "loss": 3.8698, "step": 8170 }, { "epoch": 215.0, "eval_accuracy": 0.4010263929618768, "eval_loss": 4.146964073181152, "eval_runtime": 0.6139, "eval_samples_per_second": 6.515, "eval_steps_per_second": 1.629, "step": 8170 }, { "epoch": 216.0, "learning_rate": 5.679999999999999e-07, "loss": 3.8654, "step": 8208 }, { "epoch": 216.0, "eval_accuracy": 0.40175953079178883, "eval_loss": 4.146454811096191, "eval_runtime": 0.6121, "eval_samples_per_second": 6.535, "eval_steps_per_second": 1.634, "step": 8208 }, { "epoch": 217.0, "learning_rate": 5.66e-07, "loss": 3.8608, "step": 8246 }, { "epoch": 217.0, "eval_accuracy": 0.4020039100684262, "eval_loss": 4.145140171051025, "eval_runtime": 0.6127, "eval_samples_per_second": 6.528, "eval_steps_per_second": 1.632, "step": 8246 }, { "epoch": 218.0, "learning_rate": 5.639999999999999e-07, "loss": 3.8584, "step": 8284 }, { "epoch": 218.0, "eval_accuracy": 0.4015151515151515, "eval_loss": 4.142205715179443, "eval_runtime": 0.6251, "eval_samples_per_second": 6.399, "eval_steps_per_second": 1.6, "step": 8284 }, { "epoch": 219.0, "learning_rate": 5.620000000000001e-07, "loss": 3.8546, "step": 8322 }, { "epoch": 219.0, "eval_accuracy": 0.40249266862170086, "eval_loss": 4.1411662101745605, "eval_runtime": 0.6119, "eval_samples_per_second": 6.537, "eval_steps_per_second": 1.634, "step": 8322 }, { "epoch": 220.0, "learning_rate": 5.6e-07, "loss": 3.8494, "step": 8360 }, { "epoch": 220.0, "eval_accuracy": 0.4022482893450635, "eval_loss": 4.140811920166016, "eval_runtime": 0.6132, "eval_samples_per_second": 6.523, "eval_steps_per_second": 1.631, "step": 8360 }, { "epoch": 221.0, "learning_rate": 5.58e-07, "loss": 3.8479, "step": 8398 }, { "epoch": 221.0, "eval_accuracy": 0.40249266862170086, "eval_loss": 4.13836145401001, "eval_runtime": 0.613, "eval_samples_per_second": 6.526, "eval_steps_per_second": 1.631, "step": 8398 }, { "epoch": 222.0, "learning_rate": 5.560000000000001e-07, "loss": 3.8463, "step": 8436 }, { "epoch": 222.0, "eval_accuracy": 0.40249266862170086, "eval_loss": 4.136462688446045, "eval_runtime": 0.6138, "eval_samples_per_second": 6.516, "eval_steps_per_second": 1.629, "step": 8436 }, { "epoch": 223.0, "learning_rate": 5.54e-07, "loss": 3.8422, "step": 8474 }, { "epoch": 223.0, "eval_accuracy": 0.40298142717497554, "eval_loss": 4.1326165199279785, "eval_runtime": 0.6246, "eval_samples_per_second": 6.404, "eval_steps_per_second": 1.601, "step": 8474 }, { "epoch": 224.0, "learning_rate": 5.520000000000001e-07, "loss": 3.8395, "step": 8512 }, { "epoch": 224.0, "eval_accuracy": 0.4022482893450635, "eval_loss": 4.133283615112305, "eval_runtime": 0.6135, "eval_samples_per_second": 6.52, "eval_steps_per_second": 1.63, "step": 8512 }, { "epoch": 225.0, "learning_rate": 5.5e-07, "loss": 3.8369, "step": 8550 }, { "epoch": 225.0, "eval_accuracy": 0.4034701857282502, "eval_loss": 4.133824825286865, "eval_runtime": 0.6142, "eval_samples_per_second": 6.513, "eval_steps_per_second": 1.628, "step": 8550 }, { "epoch": 226.0, "learning_rate": 5.48e-07, "loss": 3.8357, "step": 8588 }, { "epoch": 226.0, "eval_accuracy": 0.4046920821114369, "eval_loss": 4.129902362823486, "eval_runtime": 0.6147, "eval_samples_per_second": 6.507, "eval_steps_per_second": 1.627, "step": 8588 }, { "epoch": 227.0, "learning_rate": 5.46e-07, "loss": 3.8318, "step": 8626 }, { "epoch": 227.0, "eval_accuracy": 0.40420332355816224, "eval_loss": 4.129788398742676, "eval_runtime": 0.6132, "eval_samples_per_second": 6.523, "eval_steps_per_second": 1.631, "step": 8626 }, { "epoch": 228.0, "learning_rate": 5.44e-07, "loss": 3.8258, "step": 8664 }, { "epoch": 228.0, "eval_accuracy": 0.4039589442815249, "eval_loss": 4.129807472229004, "eval_runtime": 0.6141, "eval_samples_per_second": 6.513, "eval_steps_per_second": 1.628, "step": 8664 }, { "epoch": 229.0, "learning_rate": 5.420000000000001e-07, "loss": 3.8265, "step": 8702 }, { "epoch": 229.0, "eval_accuracy": 0.4044477028347996, "eval_loss": 4.127597332000732, "eval_runtime": 0.6142, "eval_samples_per_second": 6.513, "eval_steps_per_second": 1.628, "step": 8702 }, { "epoch": 230.0, "learning_rate": 5.4e-07, "loss": 3.8229, "step": 8740 }, { "epoch": 230.0, "eval_accuracy": 0.40420332355816224, "eval_loss": 4.126589298248291, "eval_runtime": 0.6331, "eval_samples_per_second": 6.318, "eval_steps_per_second": 1.58, "step": 8740 }, { "epoch": 231.0, "learning_rate": 5.38e-07, "loss": 3.8139, "step": 8778 }, { "epoch": 231.0, "eval_accuracy": 0.40420332355816224, "eval_loss": 4.125330448150635, "eval_runtime": 0.6136, "eval_samples_per_second": 6.519, "eval_steps_per_second": 1.63, "step": 8778 }, { "epoch": 232.0, "learning_rate": 5.36e-07, "loss": 3.8132, "step": 8816 }, { "epoch": 232.0, "eval_accuracy": 0.4046920821114369, "eval_loss": 4.1250810623168945, "eval_runtime": 0.6129, "eval_samples_per_second": 6.526, "eval_steps_per_second": 1.632, "step": 8816 }, { "epoch": 233.0, "learning_rate": 5.34e-07, "loss": 3.8126, "step": 8854 }, { "epoch": 233.0, "eval_accuracy": 0.4046920821114369, "eval_loss": 4.122879505157471, "eval_runtime": 0.6134, "eval_samples_per_second": 6.521, "eval_steps_per_second": 1.63, "step": 8854 }, { "epoch": 234.0, "learning_rate": 5.32e-07, "loss": 3.8074, "step": 8892 }, { "epoch": 234.0, "eval_accuracy": 0.40640273704789837, "eval_loss": 4.121622085571289, "eval_runtime": 0.6131, "eval_samples_per_second": 6.524, "eval_steps_per_second": 1.631, "step": 8892 }, { "epoch": 235.0, "learning_rate": 5.3e-07, "loss": 3.8072, "step": 8930 }, { "epoch": 235.0, "eval_accuracy": 0.4066471163245357, "eval_loss": 4.121754169464111, "eval_runtime": 0.6133, "eval_samples_per_second": 6.522, "eval_steps_per_second": 1.63, "step": 8930 }, { "epoch": 236.0, "learning_rate": 5.28e-07, "loss": 3.8056, "step": 8968 }, { "epoch": 236.0, "eval_accuracy": 0.4066471163245357, "eval_loss": 4.116854667663574, "eval_runtime": 0.613, "eval_samples_per_second": 6.525, "eval_steps_per_second": 1.631, "step": 8968 }, { "epoch": 237.0, "learning_rate": 5.26e-07, "loss": 3.8038, "step": 9006 }, { "epoch": 237.0, "eval_accuracy": 0.4066471163245357, "eval_loss": 4.116855621337891, "eval_runtime": 0.6128, "eval_samples_per_second": 6.527, "eval_steps_per_second": 1.632, "step": 9006 }, { "epoch": 238.0, "learning_rate": 5.24e-07, "loss": 3.8025, "step": 9044 }, { "epoch": 238.0, "eval_accuracy": 0.4066471163245357, "eval_loss": 4.115084648132324, "eval_runtime": 0.613, "eval_samples_per_second": 6.525, "eval_steps_per_second": 1.631, "step": 9044 }, { "epoch": 239.0, "learning_rate": 5.22e-07, "loss": 3.7948, "step": 9082 }, { "epoch": 239.0, "eval_accuracy": 0.40689149560117305, "eval_loss": 4.11461877822876, "eval_runtime": 0.6133, "eval_samples_per_second": 6.522, "eval_steps_per_second": 1.63, "step": 9082 }, { "epoch": 240.0, "learning_rate": 5.2e-07, "loss": 3.7929, "step": 9120 }, { "epoch": 240.0, "eval_accuracy": 0.4066471163245357, "eval_loss": 4.1119794845581055, "eval_runtime": 0.6139, "eval_samples_per_second": 6.516, "eval_steps_per_second": 1.629, "step": 9120 }, { "epoch": 241.0, "learning_rate": 5.18e-07, "loss": 3.7922, "step": 9158 }, { "epoch": 241.0, "eval_accuracy": 0.40689149560117305, "eval_loss": 4.111790180206299, "eval_runtime": 0.6215, "eval_samples_per_second": 6.436, "eval_steps_per_second": 1.609, "step": 9158 }, { "epoch": 242.0, "learning_rate": 5.16e-07, "loss": 3.7897, "step": 9196 }, { "epoch": 242.0, "eval_accuracy": 0.40762463343108507, "eval_loss": 4.109217166900635, "eval_runtime": 0.6132, "eval_samples_per_second": 6.523, "eval_steps_per_second": 1.631, "step": 9196 }, { "epoch": 243.0, "learning_rate": 5.14e-07, "loss": 3.7877, "step": 9234 }, { "epoch": 243.0, "eval_accuracy": 0.4078690127077224, "eval_loss": 4.107990741729736, "eval_runtime": 0.613, "eval_samples_per_second": 6.525, "eval_steps_per_second": 1.631, "step": 9234 }, { "epoch": 244.0, "learning_rate": 5.12e-07, "loss": 3.7829, "step": 9272 }, { "epoch": 244.0, "eval_accuracy": 0.4071358748778104, "eval_loss": 4.1082682609558105, "eval_runtime": 0.6131, "eval_samples_per_second": 6.524, "eval_steps_per_second": 1.631, "step": 9272 }, { "epoch": 245.0, "learning_rate": 5.1e-07, "loss": 3.7814, "step": 9310 }, { "epoch": 245.0, "eval_accuracy": 0.40762463343108507, "eval_loss": 4.108653545379639, "eval_runtime": 0.6141, "eval_samples_per_second": 6.514, "eval_steps_per_second": 1.628, "step": 9310 }, { "epoch": 246.0, "learning_rate": 5.079999999999999e-07, "loss": 3.781, "step": 9348 }, { "epoch": 246.0, "eval_accuracy": 0.4071358748778104, "eval_loss": 4.1042561531066895, "eval_runtime": 0.6137, "eval_samples_per_second": 6.517, "eval_steps_per_second": 1.629, "step": 9348 }, { "epoch": 247.0, "learning_rate": 5.06e-07, "loss": 3.7728, "step": 9386 }, { "epoch": 247.0, "eval_accuracy": 0.40811339198435975, "eval_loss": 4.102220058441162, "eval_runtime": 0.6137, "eval_samples_per_second": 6.517, "eval_steps_per_second": 1.629, "step": 9386 }, { "epoch": 248.0, "learning_rate": 5.04e-07, "loss": 3.779, "step": 9424 }, { "epoch": 248.0, "eval_accuracy": 0.40811339198435975, "eval_loss": 4.101465225219727, "eval_runtime": 0.6127, "eval_samples_per_second": 6.529, "eval_steps_per_second": 1.632, "step": 9424 }, { "epoch": 249.0, "learning_rate": 5.02e-07, "loss": 3.7716, "step": 9462 }, { "epoch": 249.0, "eval_accuracy": 0.4078690127077224, "eval_loss": 4.103041172027588, "eval_runtime": 0.6131, "eval_samples_per_second": 6.524, "eval_steps_per_second": 1.631, "step": 9462 }, { "epoch": 250.0, "learning_rate": 5e-07, "loss": 3.7674, "step": 9500 }, { "epoch": 250.0, "eval_accuracy": 0.4078690127077224, "eval_loss": 4.099481105804443, "eval_runtime": 0.6129, "eval_samples_per_second": 6.526, "eval_steps_per_second": 1.632, "step": 9500 }, { "epoch": 251.0, "learning_rate": 4.979999999999999e-07, "loss": 3.7665, "step": 9538 }, { "epoch": 251.0, "eval_accuracy": 0.40860215053763443, "eval_loss": 4.0990800857543945, "eval_runtime": 0.6218, "eval_samples_per_second": 6.433, "eval_steps_per_second": 1.608, "step": 9538 }, { "epoch": 252.0, "learning_rate": 4.96e-07, "loss": 3.7603, "step": 9576 }, { "epoch": 252.0, "eval_accuracy": 0.40738025415444773, "eval_loss": 4.100230693817139, "eval_runtime": 0.6144, "eval_samples_per_second": 6.511, "eval_steps_per_second": 1.628, "step": 9576 }, { "epoch": 253.0, "learning_rate": 4.94e-07, "loss": 3.7645, "step": 9614 }, { "epoch": 253.0, "eval_accuracy": 0.40860215053763443, "eval_loss": 4.095699787139893, "eval_runtime": 0.6134, "eval_samples_per_second": 6.521, "eval_steps_per_second": 1.63, "step": 9614 }, { "epoch": 254.0, "learning_rate": 4.92e-07, "loss": 3.7622, "step": 9652 }, { "epoch": 254.0, "eval_accuracy": 0.4083577712609971, "eval_loss": 4.0959062576293945, "eval_runtime": 2.189, "eval_samples_per_second": 1.827, "eval_steps_per_second": 0.457, "step": 9652 }, { "epoch": 255.0, "learning_rate": 4.9e-07, "loss": 3.7583, "step": 9690 }, { "epoch": 255.0, "eval_accuracy": 0.4083577712609971, "eval_loss": 4.0954976081848145, "eval_runtime": 0.6143, "eval_samples_per_second": 6.511, "eval_steps_per_second": 1.628, "step": 9690 }, { "epoch": 256.0, "learning_rate": 4.879999999999999e-07, "loss": 3.752, "step": 9728 }, { "epoch": 256.0, "eval_accuracy": 0.40860215053763443, "eval_loss": 4.0929741859436035, "eval_runtime": 0.6139, "eval_samples_per_second": 6.515, "eval_steps_per_second": 1.629, "step": 9728 }, { "epoch": 257.0, "learning_rate": 4.86e-07, "loss": 3.7545, "step": 9766 }, { "epoch": 257.0, "eval_accuracy": 0.4090909090909091, "eval_loss": 4.0912184715271, "eval_runtime": 0.6131, "eval_samples_per_second": 6.524, "eval_steps_per_second": 1.631, "step": 9766 }, { "epoch": 258.0, "learning_rate": 4.839999999999999e-07, "loss": 3.7447, "step": 9804 }, { "epoch": 258.0, "eval_accuracy": 0.4090909090909091, "eval_loss": 4.092291831970215, "eval_runtime": 0.6146, "eval_samples_per_second": 6.509, "eval_steps_per_second": 1.627, "step": 9804 }, { "epoch": 259.0, "learning_rate": 4.82e-07, "loss": 3.7483, "step": 9842 }, { "epoch": 259.0, "eval_accuracy": 0.40860215053763443, "eval_loss": 4.089372158050537, "eval_runtime": 0.6129, "eval_samples_per_second": 6.526, "eval_steps_per_second": 1.632, "step": 9842 }, { "epoch": 260.0, "learning_rate": 4.8e-07, "loss": 3.7428, "step": 9880 }, { "epoch": 260.0, "eval_accuracy": 0.40860215053763443, "eval_loss": 4.090963840484619, "eval_runtime": 0.6144, "eval_samples_per_second": 6.511, "eval_steps_per_second": 1.628, "step": 9880 }, { "epoch": 261.0, "learning_rate": 4.779999999999999e-07, "loss": 3.7407, "step": 9918 }, { "epoch": 261.0, "eval_accuracy": 0.40860215053763443, "eval_loss": 4.087746620178223, "eval_runtime": 0.6146, "eval_samples_per_second": 6.508, "eval_steps_per_second": 1.627, "step": 9918 }, { "epoch": 262.0, "learning_rate": 4.76e-07, "loss": 3.7405, "step": 9956 }, { "epoch": 262.0, "eval_accuracy": 0.4090909090909091, "eval_loss": 4.089057922363281, "eval_runtime": 0.6138, "eval_samples_per_second": 6.516, "eval_steps_per_second": 1.629, "step": 9956 }, { "epoch": 263.0, "learning_rate": 4.7399999999999993e-07, "loss": 3.7354, "step": 9994 }, { "epoch": 263.0, "eval_accuracy": 0.4088465298142718, "eval_loss": 4.0869574546813965, "eval_runtime": 0.6139, "eval_samples_per_second": 6.516, "eval_steps_per_second": 1.629, "step": 9994 }, { "epoch": 264.0, "learning_rate": 4.7199999999999994e-07, "loss": 3.7353, "step": 10032 }, { "epoch": 264.0, "eval_accuracy": 0.40860215053763443, "eval_loss": 4.085577487945557, "eval_runtime": 0.614, "eval_samples_per_second": 6.514, "eval_steps_per_second": 1.629, "step": 10032 }, { "epoch": 265.0, "learning_rate": 4.6999999999999995e-07, "loss": 3.7312, "step": 10070 }, { "epoch": 265.0, "eval_accuracy": 0.4090909090909091, "eval_loss": 4.083754062652588, "eval_runtime": 0.6134, "eval_samples_per_second": 6.521, "eval_steps_per_second": 1.63, "step": 10070 }, { "epoch": 266.0, "learning_rate": 4.68e-07, "loss": 3.7313, "step": 10108 }, { "epoch": 266.0, "eval_accuracy": 0.4090909090909091, "eval_loss": 4.082942485809326, "eval_runtime": 0.614, "eval_samples_per_second": 6.514, "eval_steps_per_second": 1.629, "step": 10108 }, { "epoch": 267.0, "learning_rate": 4.66e-07, "loss": 3.7264, "step": 10146 }, { "epoch": 267.0, "eval_accuracy": 0.4090909090909091, "eval_loss": 4.0826802253723145, "eval_runtime": 0.6147, "eval_samples_per_second": 6.508, "eval_steps_per_second": 1.627, "step": 10146 }, { "epoch": 268.0, "learning_rate": 4.64e-07, "loss": 3.7221, "step": 10184 }, { "epoch": 268.0, "eval_accuracy": 0.40933528836754646, "eval_loss": 4.081498622894287, "eval_runtime": 0.6152, "eval_samples_per_second": 6.502, "eval_steps_per_second": 1.625, "step": 10184 }, { "epoch": 269.0, "learning_rate": 4.62e-07, "loss": 3.7211, "step": 10222 }, { "epoch": 269.0, "eval_accuracy": 0.4090909090909091, "eval_loss": 4.0801472663879395, "eval_runtime": 0.6147, "eval_samples_per_second": 6.507, "eval_steps_per_second": 1.627, "step": 10222 }, { "epoch": 270.0, "learning_rate": 4.6e-07, "loss": 3.7232, "step": 10260 }, { "epoch": 270.0, "eval_accuracy": 0.40933528836754646, "eval_loss": 4.0787458419799805, "eval_runtime": 0.6151, "eval_samples_per_second": 6.503, "eval_steps_per_second": 1.626, "step": 10260 }, { "epoch": 271.0, "learning_rate": 4.58e-07, "loss": 3.718, "step": 10298 }, { "epoch": 271.0, "eval_accuracy": 0.4100684261974585, "eval_loss": 4.07801628112793, "eval_runtime": 0.6249, "eval_samples_per_second": 6.401, "eval_steps_per_second": 1.6, "step": 10298 }, { "epoch": 272.0, "learning_rate": 4.56e-07, "loss": 3.7208, "step": 10336 }, { "epoch": 272.0, "eval_accuracy": 0.4108015640273705, "eval_loss": 4.077081203460693, "eval_runtime": 0.6144, "eval_samples_per_second": 6.51, "eval_steps_per_second": 1.628, "step": 10336 }, { "epoch": 273.0, "learning_rate": 4.54e-07, "loss": 3.7109, "step": 10374 }, { "epoch": 273.0, "eval_accuracy": 0.4115347018572825, "eval_loss": 4.07664155960083, "eval_runtime": 0.6131, "eval_samples_per_second": 6.524, "eval_steps_per_second": 1.631, "step": 10374 }, { "epoch": 274.0, "learning_rate": 4.5199999999999997e-07, "loss": 3.7146, "step": 10412 }, { "epoch": 274.0, "eval_accuracy": 0.41104594330400784, "eval_loss": 4.073920249938965, "eval_runtime": 0.626, "eval_samples_per_second": 6.39, "eval_steps_per_second": 1.597, "step": 10412 }, { "epoch": 275.0, "learning_rate": 4.5e-07, "loss": 3.7071, "step": 10450 }, { "epoch": 275.0, "eval_accuracy": 0.41177908113391987, "eval_loss": 4.073719501495361, "eval_runtime": 0.6143, "eval_samples_per_second": 6.512, "eval_steps_per_second": 1.628, "step": 10450 }, { "epoch": 276.0, "learning_rate": 4.48e-07, "loss": 3.7044, "step": 10488 }, { "epoch": 276.0, "eval_accuracy": 0.41226783968719455, "eval_loss": 4.074197769165039, "eval_runtime": 0.6135, "eval_samples_per_second": 6.52, "eval_steps_per_second": 1.63, "step": 10488 }, { "epoch": 277.0, "learning_rate": 4.46e-07, "loss": 3.7094, "step": 10526 }, { "epoch": 277.0, "eval_accuracy": 0.4125122189638319, "eval_loss": 4.071889400482178, "eval_runtime": 0.6132, "eval_samples_per_second": 6.523, "eval_steps_per_second": 1.631, "step": 10526 }, { "epoch": 278.0, "learning_rate": 4.44e-07, "loss": 3.7028, "step": 10564 }, { "epoch": 278.0, "eval_accuracy": 0.4120234604105572, "eval_loss": 4.071835994720459, "eval_runtime": 0.6231, "eval_samples_per_second": 6.419, "eval_steps_per_second": 1.605, "step": 10564 }, { "epoch": 279.0, "learning_rate": 4.4199999999999996e-07, "loss": 3.7051, "step": 10602 }, { "epoch": 279.0, "eval_accuracy": 0.4120234604105572, "eval_loss": 4.069863319396973, "eval_runtime": 0.6133, "eval_samples_per_second": 6.522, "eval_steps_per_second": 1.63, "step": 10602 }, { "epoch": 280.0, "learning_rate": 4.3999999999999997e-07, "loss": 3.7011, "step": 10640 }, { "epoch": 280.0, "eval_accuracy": 0.4125122189638319, "eval_loss": 4.068091869354248, "eval_runtime": 0.6147, "eval_samples_per_second": 6.507, "eval_steps_per_second": 1.627, "step": 10640 }, { "epoch": 281.0, "learning_rate": 4.38e-07, "loss": 3.6954, "step": 10678 }, { "epoch": 281.0, "eval_accuracy": 0.4120234604105572, "eval_loss": 4.066802501678467, "eval_runtime": 0.6149, "eval_samples_per_second": 6.505, "eval_steps_per_second": 1.626, "step": 10678 }, { "epoch": 282.0, "learning_rate": 4.36e-07, "loss": 3.6933, "step": 10716 }, { "epoch": 282.0, "eval_accuracy": 0.41226783968719455, "eval_loss": 4.066892623901367, "eval_runtime": 0.6138, "eval_samples_per_second": 6.517, "eval_steps_per_second": 1.629, "step": 10716 }, { "epoch": 283.0, "learning_rate": 4.34e-07, "loss": 3.6935, "step": 10754 }, { "epoch": 283.0, "eval_accuracy": 0.4125122189638319, "eval_loss": 4.063753128051758, "eval_runtime": 0.6142, "eval_samples_per_second": 6.513, "eval_steps_per_second": 1.628, "step": 10754 }, { "epoch": 284.0, "learning_rate": 4.3199999999999995e-07, "loss": 3.6867, "step": 10792 }, { "epoch": 284.0, "eval_accuracy": 0.4125122189638319, "eval_loss": 4.065001964569092, "eval_runtime": 0.6148, "eval_samples_per_second": 6.506, "eval_steps_per_second": 1.627, "step": 10792 }, { "epoch": 285.0, "learning_rate": 4.2999999999999996e-07, "loss": 3.6888, "step": 10830 }, { "epoch": 285.0, "eval_accuracy": 0.4120234604105572, "eval_loss": 4.0640668869018555, "eval_runtime": 0.6141, "eval_samples_per_second": 6.513, "eval_steps_per_second": 1.628, "step": 10830 }, { "epoch": 286.0, "learning_rate": 4.2799999999999997e-07, "loss": 3.6843, "step": 10868 }, { "epoch": 286.0, "eval_accuracy": 0.4115347018572825, "eval_loss": 4.0637993812561035, "eval_runtime": 0.6137, "eval_samples_per_second": 6.518, "eval_steps_per_second": 1.629, "step": 10868 }, { "epoch": 287.0, "learning_rate": 4.26e-07, "loss": 3.6824, "step": 10906 }, { "epoch": 287.0, "eval_accuracy": 0.4125122189638319, "eval_loss": 4.06214714050293, "eval_runtime": 0.6128, "eval_samples_per_second": 6.528, "eval_steps_per_second": 1.632, "step": 10906 }, { "epoch": 288.0, "learning_rate": 4.24e-07, "loss": 3.6821, "step": 10944 }, { "epoch": 288.0, "eval_accuracy": 0.41226783968719455, "eval_loss": 4.060315132141113, "eval_runtime": 0.6129, "eval_samples_per_second": 6.526, "eval_steps_per_second": 1.632, "step": 10944 }, { "epoch": 289.0, "learning_rate": 4.2199999999999994e-07, "loss": 3.6802, "step": 10982 }, { "epoch": 289.0, "eval_accuracy": 0.4125122189638319, "eval_loss": 4.062171459197998, "eval_runtime": 0.6138, "eval_samples_per_second": 6.516, "eval_steps_per_second": 1.629, "step": 10982 }, { "epoch": 290.0, "learning_rate": 4.1999999999999995e-07, "loss": 3.6789, "step": 11020 }, { "epoch": 290.0, "eval_accuracy": 0.41275659824046923, "eval_loss": 4.057875633239746, "eval_runtime": 0.6129, "eval_samples_per_second": 6.527, "eval_steps_per_second": 1.632, "step": 11020 }, { "epoch": 291.0, "learning_rate": 4.1799999999999996e-07, "loss": 3.6767, "step": 11058 }, { "epoch": 291.0, "eval_accuracy": 0.41300097751710657, "eval_loss": 4.057925701141357, "eval_runtime": 0.6126, "eval_samples_per_second": 6.53, "eval_steps_per_second": 1.632, "step": 11058 }, { "epoch": 292.0, "learning_rate": 4.1599999999999997e-07, "loss": 3.6751, "step": 11096 }, { "epoch": 292.0, "eval_accuracy": 0.4137341153470186, "eval_loss": 4.058208465576172, "eval_runtime": 0.6175, "eval_samples_per_second": 6.478, "eval_steps_per_second": 1.62, "step": 11096 }, { "epoch": 293.0, "learning_rate": 4.14e-07, "loss": 3.6726, "step": 11134 }, { "epoch": 293.0, "eval_accuracy": 0.4137341153470186, "eval_loss": 4.055559158325195, "eval_runtime": 0.6143, "eval_samples_per_second": 6.511, "eval_steps_per_second": 1.628, "step": 11134 }, { "epoch": 294.0, "learning_rate": 4.12e-07, "loss": 3.6704, "step": 11172 }, { "epoch": 294.0, "eval_accuracy": 0.4137341153470186, "eval_loss": 4.058291435241699, "eval_runtime": 0.6137, "eval_samples_per_second": 6.517, "eval_steps_per_second": 1.629, "step": 11172 }, { "epoch": 295.0, "learning_rate": 4.0999999999999994e-07, "loss": 3.6703, "step": 11210 }, { "epoch": 295.0, "eval_accuracy": 0.4142228739002933, "eval_loss": 4.055552005767822, "eval_runtime": 0.626, "eval_samples_per_second": 6.39, "eval_steps_per_second": 1.598, "step": 11210 }, { "epoch": 296.0, "learning_rate": 4.0799999999999995e-07, "loss": 3.6662, "step": 11248 }, { "epoch": 296.0, "eval_accuracy": 0.41471163245356796, "eval_loss": 4.05183219909668, "eval_runtime": 0.613, "eval_samples_per_second": 6.525, "eval_steps_per_second": 1.631, "step": 11248 }, { "epoch": 297.0, "learning_rate": 4.06e-07, "loss": 3.6643, "step": 11286 }, { "epoch": 297.0, "eval_accuracy": 0.41471163245356796, "eval_loss": 4.05209493637085, "eval_runtime": 0.626, "eval_samples_per_second": 6.39, "eval_steps_per_second": 1.597, "step": 11286 }, { "epoch": 298.0, "learning_rate": 4.04e-07, "loss": 3.6623, "step": 11324 }, { "epoch": 298.0, "eval_accuracy": 0.4144672531769306, "eval_loss": 4.054409980773926, "eval_runtime": 0.6141, "eval_samples_per_second": 6.514, "eval_steps_per_second": 1.628, "step": 11324 }, { "epoch": 299.0, "learning_rate": 4.02e-07, "loss": 3.6626, "step": 11362 }, { "epoch": 299.0, "eval_accuracy": 0.41471163245356796, "eval_loss": 4.051777362823486, "eval_runtime": 0.6133, "eval_samples_per_second": 6.522, "eval_steps_per_second": 1.631, "step": 11362 }, { "epoch": 300.0, "learning_rate": 4e-07, "loss": 3.661, "step": 11400 }, { "epoch": 300.0, "eval_accuracy": 0.41471163245356796, "eval_loss": 4.049643516540527, "eval_runtime": 0.7928, "eval_samples_per_second": 5.046, "eval_steps_per_second": 1.261, "step": 11400 }, { "epoch": 301.0, "learning_rate": 3.98e-07, "loss": 3.6553, "step": 11438 }, { "epoch": 301.0, "eval_accuracy": 0.4149560117302053, "eval_loss": 4.048153400421143, "eval_runtime": 0.6123, "eval_samples_per_second": 6.533, "eval_steps_per_second": 1.633, "step": 11438 }, { "epoch": 302.0, "learning_rate": 3.96e-07, "loss": 3.6573, "step": 11476 }, { "epoch": 302.0, "eval_accuracy": 0.41471163245356796, "eval_loss": 4.047247886657715, "eval_runtime": 0.6143, "eval_samples_per_second": 6.512, "eval_steps_per_second": 1.628, "step": 11476 }, { "epoch": 303.0, "learning_rate": 3.94e-07, "loss": 3.6548, "step": 11514 }, { "epoch": 303.0, "eval_accuracy": 0.41520039100684264, "eval_loss": 4.046008586883545, "eval_runtime": 0.6146, "eval_samples_per_second": 6.508, "eval_steps_per_second": 1.627, "step": 11514 }, { "epoch": 304.0, "learning_rate": 3.92e-07, "loss": 3.6531, "step": 11552 }, { "epoch": 304.0, "eval_accuracy": 0.41471163245356796, "eval_loss": 4.046994209289551, "eval_runtime": 0.6142, "eval_samples_per_second": 6.513, "eval_steps_per_second": 1.628, "step": 11552 }, { "epoch": 305.0, "learning_rate": 3.8999999999999997e-07, "loss": 3.6549, "step": 11590 }, { "epoch": 305.0, "eval_accuracy": 0.4149560117302053, "eval_loss": 4.046128273010254, "eval_runtime": 0.6142, "eval_samples_per_second": 6.512, "eval_steps_per_second": 1.628, "step": 11590 }, { "epoch": 306.0, "learning_rate": 3.88e-07, "loss": 3.6485, "step": 11628 }, { "epoch": 306.0, "eval_accuracy": 0.41471163245356796, "eval_loss": 4.0460734367370605, "eval_runtime": 0.6142, "eval_samples_per_second": 6.513, "eval_steps_per_second": 1.628, "step": 11628 }, { "epoch": 307.0, "learning_rate": 3.86e-07, "loss": 3.6441, "step": 11666 }, { "epoch": 307.0, "eval_accuracy": 0.4149560117302053, "eval_loss": 4.046470642089844, "eval_runtime": 0.6145, "eval_samples_per_second": 6.509, "eval_steps_per_second": 1.627, "step": 11666 }, { "epoch": 308.0, "learning_rate": 3.84e-07, "loss": 3.6438, "step": 11704 }, { "epoch": 308.0, "eval_accuracy": 0.41593352883675466, "eval_loss": 4.042454719543457, "eval_runtime": 0.6142, "eval_samples_per_second": 6.512, "eval_steps_per_second": 1.628, "step": 11704 }, { "epoch": 309.0, "learning_rate": 3.82e-07, "loss": 3.6435, "step": 11742 }, { "epoch": 309.0, "eval_accuracy": 0.4156891495601173, "eval_loss": 4.040951251983643, "eval_runtime": 0.6138, "eval_samples_per_second": 6.517, "eval_steps_per_second": 1.629, "step": 11742 }, { "epoch": 310.0, "learning_rate": 3.7999999999999996e-07, "loss": 3.6397, "step": 11780 }, { "epoch": 310.0, "eval_accuracy": 0.41593352883675466, "eval_loss": 4.040650844573975, "eval_runtime": 0.6136, "eval_samples_per_second": 6.519, "eval_steps_per_second": 1.63, "step": 11780 }, { "epoch": 311.0, "learning_rate": 3.7799999999999997e-07, "loss": 3.6363, "step": 11818 }, { "epoch": 311.0, "eval_accuracy": 0.41544477028348, "eval_loss": 4.042422294616699, "eval_runtime": 0.6133, "eval_samples_per_second": 6.522, "eval_steps_per_second": 1.631, "step": 11818 }, { "epoch": 312.0, "learning_rate": 3.76e-07, "loss": 3.6315, "step": 11856 }, { "epoch": 312.0, "eval_accuracy": 0.41544477028348, "eval_loss": 4.043632984161377, "eval_runtime": 0.6149, "eval_samples_per_second": 6.505, "eval_steps_per_second": 1.626, "step": 11856 }, { "epoch": 313.0, "learning_rate": 3.74e-07, "loss": 3.6323, "step": 11894 }, { "epoch": 313.0, "eval_accuracy": 0.4156891495601173, "eval_loss": 4.040919303894043, "eval_runtime": 0.628, "eval_samples_per_second": 6.369, "eval_steps_per_second": 1.592, "step": 11894 }, { "epoch": 314.0, "learning_rate": 3.72e-07, "loss": 3.6386, "step": 11932 }, { "epoch": 314.0, "eval_accuracy": 0.4156891495601173, "eval_loss": 4.038565158843994, "eval_runtime": 0.6139, "eval_samples_per_second": 6.516, "eval_steps_per_second": 1.629, "step": 11932 }, { "epoch": 315.0, "learning_rate": 3.7e-07, "loss": 3.6303, "step": 11970 }, { "epoch": 315.0, "eval_accuracy": 0.41544477028348, "eval_loss": 4.0388689041137695, "eval_runtime": 0.6134, "eval_samples_per_second": 6.521, "eval_steps_per_second": 1.63, "step": 11970 }, { "epoch": 316.0, "learning_rate": 3.6799999999999996e-07, "loss": 3.6336, "step": 12008 }, { "epoch": 316.0, "eval_accuracy": 0.41642228739002934, "eval_loss": 4.039405345916748, "eval_runtime": 0.614, "eval_samples_per_second": 6.514, "eval_steps_per_second": 1.629, "step": 12008 }, { "epoch": 317.0, "learning_rate": 3.6599999999999997e-07, "loss": 3.6281, "step": 12046 }, { "epoch": 317.0, "eval_accuracy": 0.4166666666666667, "eval_loss": 4.038857460021973, "eval_runtime": 0.6134, "eval_samples_per_second": 6.521, "eval_steps_per_second": 1.63, "step": 12046 }, { "epoch": 318.0, "learning_rate": 3.64e-07, "loss": 3.6249, "step": 12084 }, { "epoch": 318.0, "eval_accuracy": 0.41764418377321605, "eval_loss": 4.037881374359131, "eval_runtime": 0.7782, "eval_samples_per_second": 5.14, "eval_steps_per_second": 1.285, "step": 12084 }, { "epoch": 319.0, "learning_rate": 3.62e-07, "loss": 3.6277, "step": 12122 }, { "epoch": 319.0, "eval_accuracy": 0.41764418377321605, "eval_loss": 4.037135601043701, "eval_runtime": 0.6133, "eval_samples_per_second": 6.522, "eval_steps_per_second": 1.631, "step": 12122 }, { "epoch": 320.0, "learning_rate": 3.6e-07, "loss": 3.6232, "step": 12160 }, { "epoch": 320.0, "eval_accuracy": 0.41715542521994137, "eval_loss": 4.035280704498291, "eval_runtime": 0.614, "eval_samples_per_second": 6.515, "eval_steps_per_second": 1.629, "step": 12160 }, { "epoch": 321.0, "learning_rate": 3.5799999999999995e-07, "loss": 3.6177, "step": 12198 }, { "epoch": 321.0, "eval_accuracy": 0.41764418377321605, "eval_loss": 4.036287307739258, "eval_runtime": 0.6153, "eval_samples_per_second": 6.501, "eval_steps_per_second": 1.625, "step": 12198 }, { "epoch": 322.0, "learning_rate": 3.5599999999999996e-07, "loss": 3.626, "step": 12236 }, { "epoch": 322.0, "eval_accuracy": 0.4173998044965787, "eval_loss": 4.031866073608398, "eval_runtime": 0.6128, "eval_samples_per_second": 6.527, "eval_steps_per_second": 1.632, "step": 12236 }, { "epoch": 323.0, "learning_rate": 3.5399999999999997e-07, "loss": 3.6181, "step": 12274 }, { "epoch": 323.0, "eval_accuracy": 0.41715542521994137, "eval_loss": 4.031935691833496, "eval_runtime": 0.6146, "eval_samples_per_second": 6.509, "eval_steps_per_second": 1.627, "step": 12274 }, { "epoch": 324.0, "learning_rate": 3.52e-07, "loss": 3.6183, "step": 12312 }, { "epoch": 324.0, "eval_accuracy": 0.41764418377321605, "eval_loss": 4.03291130065918, "eval_runtime": 0.6147, "eval_samples_per_second": 6.507, "eval_steps_per_second": 1.627, "step": 12312 }, { "epoch": 325.0, "learning_rate": 3.5e-07, "loss": 3.6169, "step": 12350 }, { "epoch": 325.0, "eval_accuracy": 0.41764418377321605, "eval_loss": 4.032841682434082, "eval_runtime": 0.6143, "eval_samples_per_second": 6.511, "eval_steps_per_second": 1.628, "step": 12350 }, { "epoch": 326.0, "learning_rate": 3.4799999999999994e-07, "loss": 3.6094, "step": 12388 }, { "epoch": 326.0, "eval_accuracy": 0.4178885630498534, "eval_loss": 4.031832218170166, "eval_runtime": 0.614, "eval_samples_per_second": 6.515, "eval_steps_per_second": 1.629, "step": 12388 }, { "epoch": 327.0, "learning_rate": 3.4599999999999995e-07, "loss": 3.6138, "step": 12426 }, { "epoch": 327.0, "eval_accuracy": 0.4178885630498534, "eval_loss": 4.029395580291748, "eval_runtime": 0.6125, "eval_samples_per_second": 6.531, "eval_steps_per_second": 1.633, "step": 12426 }, { "epoch": 328.0, "learning_rate": 3.4399999999999996e-07, "loss": 3.6101, "step": 12464 }, { "epoch": 328.0, "eval_accuracy": 0.41813294232649073, "eval_loss": 4.031092166900635, "eval_runtime": 0.6135, "eval_samples_per_second": 6.519, "eval_steps_per_second": 1.63, "step": 12464 }, { "epoch": 329.0, "learning_rate": 3.42e-07, "loss": 3.6062, "step": 12502 }, { "epoch": 329.0, "eval_accuracy": 0.41837732160312807, "eval_loss": 4.029919624328613, "eval_runtime": 0.6173, "eval_samples_per_second": 6.48, "eval_steps_per_second": 1.62, "step": 12502 }, { "epoch": 330.0, "learning_rate": 3.4000000000000003e-07, "loss": 3.6093, "step": 12540 }, { "epoch": 330.0, "eval_accuracy": 0.41813294232649073, "eval_loss": 4.027568817138672, "eval_runtime": 0.6141, "eval_samples_per_second": 6.514, "eval_steps_per_second": 1.628, "step": 12540 }, { "epoch": 331.0, "learning_rate": 3.38e-07, "loss": 3.6071, "step": 12578 }, { "epoch": 331.0, "eval_accuracy": 0.41813294232649073, "eval_loss": 4.030076503753662, "eval_runtime": 0.6148, "eval_samples_per_second": 6.506, "eval_steps_per_second": 1.627, "step": 12578 }, { "epoch": 332.0, "learning_rate": 3.36e-07, "loss": 3.6064, "step": 12616 }, { "epoch": 332.0, "eval_accuracy": 0.41837732160312807, "eval_loss": 4.027680397033691, "eval_runtime": 0.6131, "eval_samples_per_second": 6.524, "eval_steps_per_second": 1.631, "step": 12616 }, { "epoch": 333.0, "learning_rate": 3.34e-07, "loss": 3.5982, "step": 12654 }, { "epoch": 333.0, "eval_accuracy": 0.41837732160312807, "eval_loss": 4.028773784637451, "eval_runtime": 0.6141, "eval_samples_per_second": 6.513, "eval_steps_per_second": 1.628, "step": 12654 }, { "epoch": 334.0, "learning_rate": 3.32e-07, "loss": 3.6064, "step": 12692 }, { "epoch": 334.0, "eval_accuracy": 0.4178885630498534, "eval_loss": 4.0255818367004395, "eval_runtime": 0.6242, "eval_samples_per_second": 6.408, "eval_steps_per_second": 1.602, "step": 12692 }, { "epoch": 335.0, "learning_rate": 3.3e-07, "loss": 3.6023, "step": 12730 }, { "epoch": 335.0, "eval_accuracy": 0.41837732160312807, "eval_loss": 4.025238037109375, "eval_runtime": 0.6137, "eval_samples_per_second": 6.518, "eval_steps_per_second": 1.629, "step": 12730 }, { "epoch": 336.0, "learning_rate": 3.28e-07, "loss": 3.5992, "step": 12768 }, { "epoch": 336.0, "eval_accuracy": 0.4186217008797654, "eval_loss": 4.024014472961426, "eval_runtime": 0.6127, "eval_samples_per_second": 6.529, "eval_steps_per_second": 1.632, "step": 12768 }, { "epoch": 337.0, "learning_rate": 3.26e-07, "loss": 3.5997, "step": 12806 }, { "epoch": 337.0, "eval_accuracy": 0.41886608015640275, "eval_loss": 4.0236945152282715, "eval_runtime": 0.6138, "eval_samples_per_second": 6.516, "eval_steps_per_second": 1.629, "step": 12806 }, { "epoch": 338.0, "learning_rate": 3.24e-07, "loss": 3.5955, "step": 12844 }, { "epoch": 338.0, "eval_accuracy": 0.4186217008797654, "eval_loss": 4.02353048324585, "eval_runtime": 0.6139, "eval_samples_per_second": 6.516, "eval_steps_per_second": 1.629, "step": 12844 }, { "epoch": 339.0, "learning_rate": 3.22e-07, "loss": 3.5929, "step": 12882 }, { "epoch": 339.0, "eval_accuracy": 0.4186217008797654, "eval_loss": 4.023321151733398, "eval_runtime": 0.6138, "eval_samples_per_second": 6.517, "eval_steps_per_second": 1.629, "step": 12882 }, { "epoch": 340.0, "learning_rate": 3.2e-07, "loss": 3.5953, "step": 12920 }, { "epoch": 340.0, "eval_accuracy": 0.41886608015640275, "eval_loss": 4.020965099334717, "eval_runtime": 0.6126, "eval_samples_per_second": 6.53, "eval_steps_per_second": 1.632, "step": 12920 }, { "epoch": 341.0, "learning_rate": 3.18e-07, "loss": 3.5915, "step": 12958 }, { "epoch": 341.0, "eval_accuracy": 0.41837732160312807, "eval_loss": 4.020979404449463, "eval_runtime": 0.6256, "eval_samples_per_second": 6.394, "eval_steps_per_second": 1.598, "step": 12958 }, { "epoch": 342.0, "learning_rate": 3.1599999999999997e-07, "loss": 3.5835, "step": 12996 }, { "epoch": 342.0, "eval_accuracy": 0.41886608015640275, "eval_loss": 4.022586345672607, "eval_runtime": 0.6251, "eval_samples_per_second": 6.399, "eval_steps_per_second": 1.6, "step": 12996 }, { "epoch": 343.0, "learning_rate": 3.14e-07, "loss": 3.5852, "step": 13034 }, { "epoch": 343.0, "eval_accuracy": 0.41886608015640275, "eval_loss": 4.022684574127197, "eval_runtime": 0.6146, "eval_samples_per_second": 6.508, "eval_steps_per_second": 1.627, "step": 13034 }, { "epoch": 344.0, "learning_rate": 3.12e-07, "loss": 3.5894, "step": 13072 }, { "epoch": 344.0, "eval_accuracy": 0.4191104594330401, "eval_loss": 4.022200584411621, "eval_runtime": 0.6135, "eval_samples_per_second": 6.52, "eval_steps_per_second": 1.63, "step": 13072 }, { "epoch": 345.0, "learning_rate": 3.1e-07, "loss": 3.5864, "step": 13110 }, { "epoch": 345.0, "eval_accuracy": 0.41935483870967744, "eval_loss": 4.022695541381836, "eval_runtime": 0.6139, "eval_samples_per_second": 6.516, "eval_steps_per_second": 1.629, "step": 13110 }, { "epoch": 346.0, "learning_rate": 3.08e-07, "loss": 3.5854, "step": 13148 }, { "epoch": 346.0, "eval_accuracy": 0.41935483870967744, "eval_loss": 4.018957138061523, "eval_runtime": 0.6145, "eval_samples_per_second": 6.51, "eval_steps_per_second": 1.627, "step": 13148 }, { "epoch": 347.0, "learning_rate": 3.0599999999999996e-07, "loss": 3.5841, "step": 13186 }, { "epoch": 347.0, "eval_accuracy": 0.4191104594330401, "eval_loss": 4.017984390258789, "eval_runtime": 0.6155, "eval_samples_per_second": 6.499, "eval_steps_per_second": 1.625, "step": 13186 }, { "epoch": 348.0, "learning_rate": 3.0399999999999997e-07, "loss": 3.5821, "step": 13224 }, { "epoch": 348.0, "eval_accuracy": 0.41935483870967744, "eval_loss": 4.018927097320557, "eval_runtime": 0.6152, "eval_samples_per_second": 6.502, "eval_steps_per_second": 1.626, "step": 13224 }, { "epoch": 349.0, "learning_rate": 3.02e-07, "loss": 3.5823, "step": 13262 }, { "epoch": 349.0, "eval_accuracy": 0.4191104594330401, "eval_loss": 4.0175862312316895, "eval_runtime": 0.6146, "eval_samples_per_second": 6.508, "eval_steps_per_second": 1.627, "step": 13262 }, { "epoch": 350.0, "learning_rate": 3e-07, "loss": 3.5772, "step": 13300 }, { "epoch": 350.0, "eval_accuracy": 0.4191104594330401, "eval_loss": 4.016434669494629, "eval_runtime": 0.6139, "eval_samples_per_second": 6.515, "eval_steps_per_second": 1.629, "step": 13300 }, { "epoch": 351.0, "learning_rate": 2.98e-07, "loss": 3.5827, "step": 13338 }, { "epoch": 351.0, "eval_accuracy": 0.4186217008797654, "eval_loss": 4.014683723449707, "eval_runtime": 0.6135, "eval_samples_per_second": 6.52, "eval_steps_per_second": 1.63, "step": 13338 }, { "epoch": 352.0, "learning_rate": 2.9599999999999995e-07, "loss": 3.5747, "step": 13376 }, { "epoch": 352.0, "eval_accuracy": 0.41935483870967744, "eval_loss": 4.014786720275879, "eval_runtime": 0.6143, "eval_samples_per_second": 6.512, "eval_steps_per_second": 1.628, "step": 13376 }, { "epoch": 353.0, "learning_rate": 2.9399999999999996e-07, "loss": 3.5745, "step": 13414 }, { "epoch": 353.0, "eval_accuracy": 0.41935483870967744, "eval_loss": 4.016923904418945, "eval_runtime": 0.6148, "eval_samples_per_second": 6.507, "eval_steps_per_second": 1.627, "step": 13414 }, { "epoch": 354.0, "learning_rate": 2.9199999999999997e-07, "loss": 3.576, "step": 13452 }, { "epoch": 354.0, "eval_accuracy": 0.41935483870967744, "eval_loss": 4.0161919593811035, "eval_runtime": 0.6148, "eval_samples_per_second": 6.506, "eval_steps_per_second": 1.627, "step": 13452 }, { "epoch": 355.0, "learning_rate": 2.9e-07, "loss": 3.5723, "step": 13490 }, { "epoch": 355.0, "eval_accuracy": 0.41935483870967744, "eval_loss": 4.012264728546143, "eval_runtime": 0.6147, "eval_samples_per_second": 6.507, "eval_steps_per_second": 1.627, "step": 13490 }, { "epoch": 356.0, "learning_rate": 2.88e-07, "loss": 3.5669, "step": 13528 }, { "epoch": 356.0, "eval_accuracy": 0.4195992179863148, "eval_loss": 4.014427185058594, "eval_runtime": 0.6146, "eval_samples_per_second": 6.508, "eval_steps_per_second": 1.627, "step": 13528 }, { "epoch": 357.0, "learning_rate": 2.8599999999999994e-07, "loss": 3.5721, "step": 13566 }, { "epoch": 357.0, "eval_accuracy": 0.41886608015640275, "eval_loss": 4.0136189460754395, "eval_runtime": 0.6142, "eval_samples_per_second": 6.513, "eval_steps_per_second": 1.628, "step": 13566 }, { "epoch": 358.0, "learning_rate": 2.8399999999999995e-07, "loss": 3.5725, "step": 13604 }, { "epoch": 358.0, "eval_accuracy": 0.41935483870967744, "eval_loss": 4.01244592666626, "eval_runtime": 0.6144, "eval_samples_per_second": 6.511, "eval_steps_per_second": 1.628, "step": 13604 }, { "epoch": 359.0, "learning_rate": 2.8199999999999996e-07, "loss": 3.5627, "step": 13642 }, { "epoch": 359.0, "eval_accuracy": 0.4195992179863148, "eval_loss": 4.012938976287842, "eval_runtime": 0.6137, "eval_samples_per_second": 6.518, "eval_steps_per_second": 1.629, "step": 13642 }, { "epoch": 360.0, "learning_rate": 2.8e-07, "loss": 3.5632, "step": 13680 }, { "epoch": 360.0, "eval_accuracy": 0.41935483870967744, "eval_loss": 4.012718677520752, "eval_runtime": 0.6129, "eval_samples_per_second": 6.526, "eval_steps_per_second": 1.632, "step": 13680 }, { "epoch": 361.0, "learning_rate": 2.7800000000000003e-07, "loss": 3.5641, "step": 13718 }, { "epoch": 361.0, "eval_accuracy": 0.4195992179863148, "eval_loss": 4.01040506362915, "eval_runtime": 0.6129, "eval_samples_per_second": 6.526, "eval_steps_per_second": 1.631, "step": 13718 }, { "epoch": 362.0, "learning_rate": 2.7600000000000004e-07, "loss": 3.5636, "step": 13756 }, { "epoch": 362.0, "eval_accuracy": 0.41935483870967744, "eval_loss": 4.010016918182373, "eval_runtime": 0.6136, "eval_samples_per_second": 6.519, "eval_steps_per_second": 1.63, "step": 13756 }, { "epoch": 363.0, "learning_rate": 2.74e-07, "loss": 3.5566, "step": 13794 }, { "epoch": 363.0, "eval_accuracy": 0.41935483870967744, "eval_loss": 4.01265811920166, "eval_runtime": 0.6148, "eval_samples_per_second": 6.506, "eval_steps_per_second": 1.627, "step": 13794 }, { "epoch": 364.0, "learning_rate": 2.72e-07, "loss": 3.5556, "step": 13832 }, { "epoch": 364.0, "eval_accuracy": 0.4198435972629521, "eval_loss": 4.013090133666992, "eval_runtime": 0.6144, "eval_samples_per_second": 6.51, "eval_steps_per_second": 1.627, "step": 13832 }, { "epoch": 365.0, "learning_rate": 2.7e-07, "loss": 3.5606, "step": 13870 }, { "epoch": 365.0, "eval_accuracy": 0.41935483870967744, "eval_loss": 4.01081657409668, "eval_runtime": 0.6265, "eval_samples_per_second": 6.385, "eval_steps_per_second": 1.596, "step": 13870 }, { "epoch": 366.0, "learning_rate": 2.68e-07, "loss": 3.5573, "step": 13908 }, { "epoch": 366.0, "eval_accuracy": 0.4195992179863148, "eval_loss": 4.009543418884277, "eval_runtime": 0.6141, "eval_samples_per_second": 6.514, "eval_steps_per_second": 1.628, "step": 13908 }, { "epoch": 367.0, "learning_rate": 2.66e-07, "loss": 3.5603, "step": 13946 }, { "epoch": 367.0, "eval_accuracy": 0.4191104594330401, "eval_loss": 4.007948875427246, "eval_runtime": 0.6142, "eval_samples_per_second": 6.512, "eval_steps_per_second": 1.628, "step": 13946 }, { "epoch": 368.0, "learning_rate": 2.64e-07, "loss": 3.5552, "step": 13984 }, { "epoch": 368.0, "eval_accuracy": 0.4191104594330401, "eval_loss": 4.007278919219971, "eval_runtime": 0.6134, "eval_samples_per_second": 6.521, "eval_steps_per_second": 1.63, "step": 13984 }, { "epoch": 369.0, "learning_rate": 2.62e-07, "loss": 3.5594, "step": 14022 }, { "epoch": 369.0, "eval_accuracy": 0.41935483870967744, "eval_loss": 4.007977485656738, "eval_runtime": 0.614, "eval_samples_per_second": 6.515, "eval_steps_per_second": 1.629, "step": 14022 }, { "epoch": 370.0, "learning_rate": 2.6e-07, "loss": 3.5557, "step": 14060 }, { "epoch": 370.0, "eval_accuracy": 0.41935483870967744, "eval_loss": 4.006712913513184, "eval_runtime": 0.6133, "eval_samples_per_second": 6.522, "eval_steps_per_second": 1.631, "step": 14060 }, { "epoch": 371.0, "learning_rate": 2.58e-07, "loss": 3.5523, "step": 14098 }, { "epoch": 371.0, "eval_accuracy": 0.4195992179863148, "eval_loss": 4.006473541259766, "eval_runtime": 0.615, "eval_samples_per_second": 6.504, "eval_steps_per_second": 1.626, "step": 14098 }, { "epoch": 372.0, "learning_rate": 2.56e-07, "loss": 3.5516, "step": 14136 }, { "epoch": 372.0, "eval_accuracy": 0.41935483870967744, "eval_loss": 4.007019519805908, "eval_runtime": 0.6132, "eval_samples_per_second": 6.523, "eval_steps_per_second": 1.631, "step": 14136 }, { "epoch": 373.0, "learning_rate": 2.5399999999999997e-07, "loss": 3.5466, "step": 14174 }, { "epoch": 373.0, "eval_accuracy": 0.4195992179863148, "eval_loss": 4.007321834564209, "eval_runtime": 0.614, "eval_samples_per_second": 6.515, "eval_steps_per_second": 1.629, "step": 14174 }, { "epoch": 374.0, "learning_rate": 2.52e-07, "loss": 3.5474, "step": 14212 }, { "epoch": 374.0, "eval_accuracy": 0.41935483870967744, "eval_loss": 4.004045486450195, "eval_runtime": 0.6268, "eval_samples_per_second": 6.381, "eval_steps_per_second": 1.595, "step": 14212 }, { "epoch": 375.0, "learning_rate": 2.5e-07, "loss": 3.5481, "step": 14250 }, { "epoch": 375.0, "eval_accuracy": 0.4195992179863148, "eval_loss": 4.003184795379639, "eval_runtime": 0.6149, "eval_samples_per_second": 6.505, "eval_steps_per_second": 1.626, "step": 14250 }, { "epoch": 376.0, "learning_rate": 2.48e-07, "loss": 3.5496, "step": 14288 }, { "epoch": 376.0, "eval_accuracy": 0.41935483870967744, "eval_loss": 4.00510311126709, "eval_runtime": 0.6153, "eval_samples_per_second": 6.501, "eval_steps_per_second": 1.625, "step": 14288 }, { "epoch": 377.0, "learning_rate": 2.46e-07, "loss": 3.5489, "step": 14326 }, { "epoch": 377.0, "eval_accuracy": 0.41935483870967744, "eval_loss": 4.003530502319336, "eval_runtime": 0.6149, "eval_samples_per_second": 6.505, "eval_steps_per_second": 1.626, "step": 14326 }, { "epoch": 378.0, "learning_rate": 2.4399999999999996e-07, "loss": 3.5439, "step": 14364 }, { "epoch": 378.0, "eval_accuracy": 0.4198435972629521, "eval_loss": 4.0032219886779785, "eval_runtime": 0.6146, "eval_samples_per_second": 6.508, "eval_steps_per_second": 1.627, "step": 14364 }, { "epoch": 379.0, "learning_rate": 2.4199999999999997e-07, "loss": 3.5464, "step": 14402 }, { "epoch": 379.0, "eval_accuracy": 0.42057673509286414, "eval_loss": 4.002893924713135, "eval_runtime": 0.6148, "eval_samples_per_second": 6.507, "eval_steps_per_second": 1.627, "step": 14402 }, { "epoch": 380.0, "learning_rate": 2.4e-07, "loss": 3.5455, "step": 14440 }, { "epoch": 380.0, "eval_accuracy": 0.4198435972629521, "eval_loss": 4.003747463226318, "eval_runtime": 0.6136, "eval_samples_per_second": 6.519, "eval_steps_per_second": 1.63, "step": 14440 }, { "epoch": 381.0, "learning_rate": 2.38e-07, "loss": 3.5439, "step": 14478 }, { "epoch": 381.0, "eval_accuracy": 0.42057673509286414, "eval_loss": 4.002392292022705, "eval_runtime": 0.6152, "eval_samples_per_second": 6.502, "eval_steps_per_second": 1.626, "step": 14478 }, { "epoch": 382.0, "learning_rate": 2.3599999999999997e-07, "loss": 3.542, "step": 14516 }, { "epoch": 382.0, "eval_accuracy": 0.4203323558162268, "eval_loss": 4.001096725463867, "eval_runtime": 0.6126, "eval_samples_per_second": 6.529, "eval_steps_per_second": 1.632, "step": 14516 }, { "epoch": 383.0, "learning_rate": 2.34e-07, "loss": 3.5366, "step": 14554 }, { "epoch": 383.0, "eval_accuracy": 0.4203323558162268, "eval_loss": 4.001129150390625, "eval_runtime": 0.6141, "eval_samples_per_second": 6.513, "eval_steps_per_second": 1.628, "step": 14554 }, { "epoch": 384.0, "learning_rate": 2.32e-07, "loss": 3.5368, "step": 14592 }, { "epoch": 384.0, "eval_accuracy": 0.42057673509286414, "eval_loss": 4.001524448394775, "eval_runtime": 0.614, "eval_samples_per_second": 6.515, "eval_steps_per_second": 1.629, "step": 14592 }, { "epoch": 385.0, "learning_rate": 2.3e-07, "loss": 3.5382, "step": 14630 }, { "epoch": 385.0, "eval_accuracy": 0.4210654936461388, "eval_loss": 4.0017523765563965, "eval_runtime": 0.6151, "eval_samples_per_second": 6.503, "eval_steps_per_second": 1.626, "step": 14630 }, { "epoch": 386.0, "learning_rate": 2.28e-07, "loss": 3.5358, "step": 14668 }, { "epoch": 386.0, "eval_accuracy": 0.42008797653958946, "eval_loss": 4.000154495239258, "eval_runtime": 0.6297, "eval_samples_per_second": 6.352, "eval_steps_per_second": 1.588, "step": 14668 }, { "epoch": 387.0, "learning_rate": 2.2599999999999999e-07, "loss": 3.5324, "step": 14706 }, { "epoch": 387.0, "eval_accuracy": 0.4198435972629521, "eval_loss": 3.9989571571350098, "eval_runtime": 0.6135, "eval_samples_per_second": 6.52, "eval_steps_per_second": 1.63, "step": 14706 }, { "epoch": 388.0, "learning_rate": 2.24e-07, "loss": 3.5378, "step": 14744 }, { "epoch": 388.0, "eval_accuracy": 0.42057673509286414, "eval_loss": 4.000235080718994, "eval_runtime": 0.6147, "eval_samples_per_second": 6.508, "eval_steps_per_second": 1.627, "step": 14744 }, { "epoch": 389.0, "learning_rate": 2.22e-07, "loss": 3.5334, "step": 14782 }, { "epoch": 389.0, "eval_accuracy": 0.4208211143695015, "eval_loss": 3.9985251426696777, "eval_runtime": 0.6145, "eval_samples_per_second": 6.51, "eval_steps_per_second": 1.627, "step": 14782 }, { "epoch": 390.0, "learning_rate": 2.1999999999999998e-07, "loss": 3.5349, "step": 14820 }, { "epoch": 390.0, "eval_accuracy": 0.4210654936461388, "eval_loss": 3.998689651489258, "eval_runtime": 0.6139, "eval_samples_per_second": 6.516, "eval_steps_per_second": 1.629, "step": 14820 }, { "epoch": 391.0, "learning_rate": 2.18e-07, "loss": 3.5378, "step": 14858 }, { "epoch": 391.0, "eval_accuracy": 0.4210654936461388, "eval_loss": 3.9983861446380615, "eval_runtime": 0.6143, "eval_samples_per_second": 6.511, "eval_steps_per_second": 1.628, "step": 14858 }, { "epoch": 392.0, "learning_rate": 2.1599999999999998e-07, "loss": 3.5304, "step": 14896 }, { "epoch": 392.0, "eval_accuracy": 0.42057673509286414, "eval_loss": 3.9976606369018555, "eval_runtime": 0.6239, "eval_samples_per_second": 6.411, "eval_steps_per_second": 1.603, "step": 14896 }, { "epoch": 393.0, "learning_rate": 2.1399999999999998e-07, "loss": 3.5241, "step": 14934 }, { "epoch": 393.0, "eval_accuracy": 0.42130987292277616, "eval_loss": 3.9984891414642334, "eval_runtime": 0.6127, "eval_samples_per_second": 6.528, "eval_steps_per_second": 1.632, "step": 14934 }, { "epoch": 394.0, "learning_rate": 2.12e-07, "loss": 3.527, "step": 14972 }, { "epoch": 394.0, "eval_accuracy": 0.4210654936461388, "eval_loss": 3.9997339248657227, "eval_runtime": 0.6148, "eval_samples_per_second": 6.506, "eval_steps_per_second": 1.627, "step": 14972 }, { "epoch": 395.0, "learning_rate": 2.0999999999999997e-07, "loss": 3.5261, "step": 15010 }, { "epoch": 395.0, "eval_accuracy": 0.4210654936461388, "eval_loss": 3.9985299110412598, "eval_runtime": 0.6136, "eval_samples_per_second": 6.519, "eval_steps_per_second": 1.63, "step": 15010 }, { "epoch": 396.0, "learning_rate": 2.0799999999999998e-07, "loss": 3.5233, "step": 15048 }, { "epoch": 396.0, "eval_accuracy": 0.4215542521994135, "eval_loss": 3.9982762336730957, "eval_runtime": 0.6145, "eval_samples_per_second": 6.51, "eval_steps_per_second": 1.627, "step": 15048 }, { "epoch": 397.0, "learning_rate": 2.06e-07, "loss": 3.5279, "step": 15086 }, { "epoch": 397.0, "eval_accuracy": 0.42130987292277616, "eval_loss": 3.9965884685516357, "eval_runtime": 0.614, "eval_samples_per_second": 6.515, "eval_steps_per_second": 1.629, "step": 15086 }, { "epoch": 398.0, "learning_rate": 2.0399999999999997e-07, "loss": 3.5276, "step": 15124 }, { "epoch": 398.0, "eval_accuracy": 0.42130987292277616, "eval_loss": 3.995763063430786, "eval_runtime": 0.6138, "eval_samples_per_second": 6.517, "eval_steps_per_second": 1.629, "step": 15124 }, { "epoch": 399.0, "learning_rate": 2.02e-07, "loss": 3.5214, "step": 15162 }, { "epoch": 399.0, "eval_accuracy": 0.42130987292277616, "eval_loss": 3.9957404136657715, "eval_runtime": 0.6132, "eval_samples_per_second": 6.523, "eval_steps_per_second": 1.631, "step": 15162 }, { "epoch": 400.0, "learning_rate": 2e-07, "loss": 3.5222, "step": 15200 }, { "epoch": 400.0, "eval_accuracy": 0.4210654936461388, "eval_loss": 3.995762586593628, "eval_runtime": 0.6143, "eval_samples_per_second": 6.511, "eval_steps_per_second": 1.628, "step": 15200 }, { "epoch": 401.0, "learning_rate": 1.98e-07, "loss": 3.5163, "step": 15238 }, { "epoch": 401.0, "eval_accuracy": 0.42130987292277616, "eval_loss": 3.9957165718078613, "eval_runtime": 0.6146, "eval_samples_per_second": 6.509, "eval_steps_per_second": 1.627, "step": 15238 }, { "epoch": 402.0, "learning_rate": 1.96e-07, "loss": 3.5208, "step": 15276 }, { "epoch": 402.0, "eval_accuracy": 0.42179863147605084, "eval_loss": 3.995258092880249, "eval_runtime": 0.6139, "eval_samples_per_second": 6.516, "eval_steps_per_second": 1.629, "step": 15276 }, { "epoch": 403.0, "learning_rate": 1.94e-07, "loss": 3.5168, "step": 15314 }, { "epoch": 403.0, "eval_accuracy": 0.42179863147605084, "eval_loss": 3.994943380355835, "eval_runtime": 0.616, "eval_samples_per_second": 6.494, "eval_steps_per_second": 1.623, "step": 15314 }, { "epoch": 404.0, "learning_rate": 1.92e-07, "loss": 3.5242, "step": 15352 }, { "epoch": 404.0, "eval_accuracy": 0.4215542521994135, "eval_loss": 3.994105577468872, "eval_runtime": 0.6137, "eval_samples_per_second": 6.518, "eval_steps_per_second": 1.63, "step": 15352 }, { "epoch": 405.0, "learning_rate": 1.8999999999999998e-07, "loss": 3.5205, "step": 15390 }, { "epoch": 405.0, "eval_accuracy": 0.42130987292277616, "eval_loss": 3.993699789047241, "eval_runtime": 0.6135, "eval_samples_per_second": 6.52, "eval_steps_per_second": 1.63, "step": 15390 }, { "epoch": 406.0, "learning_rate": 1.88e-07, "loss": 3.5158, "step": 15428 }, { "epoch": 406.0, "eval_accuracy": 0.42179863147605084, "eval_loss": 3.9949395656585693, "eval_runtime": 0.6145, "eval_samples_per_second": 6.509, "eval_steps_per_second": 1.627, "step": 15428 }, { "epoch": 407.0, "learning_rate": 1.86e-07, "loss": 3.517, "step": 15466 }, { "epoch": 407.0, "eval_accuracy": 0.42130987292277616, "eval_loss": 3.9939072132110596, "eval_runtime": 0.6146, "eval_samples_per_second": 6.508, "eval_steps_per_second": 1.627, "step": 15466 }, { "epoch": 408.0, "learning_rate": 1.8399999999999998e-07, "loss": 3.519, "step": 15504 }, { "epoch": 408.0, "eval_accuracy": 0.4215542521994135, "eval_loss": 3.9944329261779785, "eval_runtime": 0.6139, "eval_samples_per_second": 6.516, "eval_steps_per_second": 1.629, "step": 15504 }, { "epoch": 409.0, "learning_rate": 1.82e-07, "loss": 3.5164, "step": 15542 }, { "epoch": 409.0, "eval_accuracy": 0.42130987292277616, "eval_loss": 3.9929213523864746, "eval_runtime": 0.6175, "eval_samples_per_second": 6.478, "eval_steps_per_second": 1.619, "step": 15542 }, { "epoch": 410.0, "learning_rate": 1.8e-07, "loss": 3.5133, "step": 15580 }, { "epoch": 410.0, "eval_accuracy": 0.4210654936461388, "eval_loss": 3.9925248622894287, "eval_runtime": 0.6143, "eval_samples_per_second": 6.511, "eval_steps_per_second": 1.628, "step": 15580 }, { "epoch": 411.0, "learning_rate": 1.7799999999999998e-07, "loss": 3.5199, "step": 15618 }, { "epoch": 411.0, "eval_accuracy": 0.4210654936461388, "eval_loss": 3.9905953407287598, "eval_runtime": 0.6147, "eval_samples_per_second": 6.507, "eval_steps_per_second": 1.627, "step": 15618 }, { "epoch": 412.0, "learning_rate": 1.76e-07, "loss": 3.5117, "step": 15656 }, { "epoch": 412.0, "eval_accuracy": 0.4215542521994135, "eval_loss": 3.9919614791870117, "eval_runtime": 0.6189, "eval_samples_per_second": 6.463, "eval_steps_per_second": 1.616, "step": 15656 }, { "epoch": 413.0, "learning_rate": 1.7399999999999997e-07, "loss": 3.5151, "step": 15694 }, { "epoch": 413.0, "eval_accuracy": 0.42179863147605084, "eval_loss": 3.9906229972839355, "eval_runtime": 0.6141, "eval_samples_per_second": 6.514, "eval_steps_per_second": 1.628, "step": 15694 }, { "epoch": 414.0, "learning_rate": 1.7199999999999998e-07, "loss": 3.5093, "step": 15732 }, { "epoch": 414.0, "eval_accuracy": 0.42179863147605084, "eval_loss": 3.9914052486419678, "eval_runtime": 0.6138, "eval_samples_per_second": 6.517, "eval_steps_per_second": 1.629, "step": 15732 }, { "epoch": 415.0, "learning_rate": 1.7000000000000001e-07, "loss": 3.512, "step": 15770 }, { "epoch": 415.0, "eval_accuracy": 0.4215542521994135, "eval_loss": 3.9908926486968994, "eval_runtime": 0.6134, "eval_samples_per_second": 6.521, "eval_steps_per_second": 1.63, "step": 15770 }, { "epoch": 416.0, "learning_rate": 1.68e-07, "loss": 3.5076, "step": 15808 }, { "epoch": 416.0, "eval_accuracy": 0.42179863147605084, "eval_loss": 3.9911580085754395, "eval_runtime": 0.6142, "eval_samples_per_second": 6.512, "eval_steps_per_second": 1.628, "step": 15808 }, { "epoch": 417.0, "learning_rate": 1.66e-07, "loss": 3.5059, "step": 15846 }, { "epoch": 417.0, "eval_accuracy": 0.4220430107526882, "eval_loss": 3.9916296005249023, "eval_runtime": 0.6142, "eval_samples_per_second": 6.513, "eval_steps_per_second": 1.628, "step": 15846 }, { "epoch": 418.0, "learning_rate": 1.64e-07, "loss": 3.5096, "step": 15884 }, { "epoch": 418.0, "eval_accuracy": 0.42130987292277616, "eval_loss": 3.990671396255493, "eval_runtime": 0.6137, "eval_samples_per_second": 6.518, "eval_steps_per_second": 1.629, "step": 15884 }, { "epoch": 419.0, "learning_rate": 1.62e-07, "loss": 3.5038, "step": 15922 }, { "epoch": 419.0, "eval_accuracy": 0.42130987292277616, "eval_loss": 3.9902234077453613, "eval_runtime": 0.6135, "eval_samples_per_second": 6.52, "eval_steps_per_second": 1.63, "step": 15922 }, { "epoch": 420.0, "learning_rate": 1.6e-07, "loss": 3.5089, "step": 15960 }, { "epoch": 420.0, "eval_accuracy": 0.4215542521994135, "eval_loss": 3.989504814147949, "eval_runtime": 0.6138, "eval_samples_per_second": 6.517, "eval_steps_per_second": 1.629, "step": 15960 }, { "epoch": 421.0, "learning_rate": 1.5799999999999999e-07, "loss": 3.5091, "step": 15998 }, { "epoch": 421.0, "eval_accuracy": 0.42130987292277616, "eval_loss": 3.9893267154693604, "eval_runtime": 0.6136, "eval_samples_per_second": 6.519, "eval_steps_per_second": 1.63, "step": 15998 }, { "epoch": 422.0, "learning_rate": 1.56e-07, "loss": 3.5101, "step": 16036 }, { "epoch": 422.0, "eval_accuracy": 0.42179863147605084, "eval_loss": 3.9890270233154297, "eval_runtime": 0.6143, "eval_samples_per_second": 6.511, "eval_steps_per_second": 1.628, "step": 16036 }, { "epoch": 423.0, "learning_rate": 1.54e-07, "loss": 3.5061, "step": 16074 }, { "epoch": 423.0, "eval_accuracy": 0.4220430107526882, "eval_loss": 3.990032434463501, "eval_runtime": 0.6149, "eval_samples_per_second": 6.505, "eval_steps_per_second": 1.626, "step": 16074 }, { "epoch": 424.0, "learning_rate": 1.5199999999999998e-07, "loss": 3.5048, "step": 16112 }, { "epoch": 424.0, "eval_accuracy": 0.42179863147605084, "eval_loss": 3.9888319969177246, "eval_runtime": 0.6143, "eval_samples_per_second": 6.511, "eval_steps_per_second": 1.628, "step": 16112 }, { "epoch": 425.0, "learning_rate": 1.5e-07, "loss": 3.501, "step": 16150 }, { "epoch": 425.0, "eval_accuracy": 0.42179863147605084, "eval_loss": 3.9880638122558594, "eval_runtime": 0.6256, "eval_samples_per_second": 6.394, "eval_steps_per_second": 1.599, "step": 16150 }, { "epoch": 426.0, "learning_rate": 1.4799999999999998e-07, "loss": 3.5067, "step": 16188 }, { "epoch": 426.0, "eval_accuracy": 0.42179863147605084, "eval_loss": 3.987746000289917, "eval_runtime": 0.613, "eval_samples_per_second": 6.526, "eval_steps_per_second": 1.631, "step": 16188 }, { "epoch": 427.0, "learning_rate": 1.4599999999999998e-07, "loss": 3.5037, "step": 16226 }, { "epoch": 427.0, "eval_accuracy": 0.4222873900293255, "eval_loss": 3.986624002456665, "eval_runtime": 0.6137, "eval_samples_per_second": 6.518, "eval_steps_per_second": 1.629, "step": 16226 }, { "epoch": 428.0, "learning_rate": 1.44e-07, "loss": 3.5052, "step": 16264 }, { "epoch": 428.0, "eval_accuracy": 0.4222873900293255, "eval_loss": 3.985456943511963, "eval_runtime": 0.613, "eval_samples_per_second": 6.525, "eval_steps_per_second": 1.631, "step": 16264 }, { "epoch": 429.0, "learning_rate": 1.4199999999999997e-07, "loss": 3.5049, "step": 16302 }, { "epoch": 429.0, "eval_accuracy": 0.4222873900293255, "eval_loss": 3.9861788749694824, "eval_runtime": 0.6134, "eval_samples_per_second": 6.521, "eval_steps_per_second": 1.63, "step": 16302 }, { "epoch": 430.0, "learning_rate": 1.4e-07, "loss": 3.5017, "step": 16340 }, { "epoch": 430.0, "eval_accuracy": 0.4227761485826002, "eval_loss": 3.987318992614746, "eval_runtime": 0.6129, "eval_samples_per_second": 6.527, "eval_steps_per_second": 1.632, "step": 16340 }, { "epoch": 431.0, "learning_rate": 1.3800000000000002e-07, "loss": 3.5038, "step": 16378 }, { "epoch": 431.0, "eval_accuracy": 0.4227761485826002, "eval_loss": 3.9872233867645264, "eval_runtime": 0.6139, "eval_samples_per_second": 6.516, "eval_steps_per_second": 1.629, "step": 16378 }, { "epoch": 432.0, "learning_rate": 1.36e-07, "loss": 3.5072, "step": 16416 }, { "epoch": 432.0, "eval_accuracy": 0.42253176930596287, "eval_loss": 3.985309362411499, "eval_runtime": 0.6135, "eval_samples_per_second": 6.52, "eval_steps_per_second": 1.63, "step": 16416 }, { "epoch": 433.0, "learning_rate": 1.34e-07, "loss": 3.5009, "step": 16454 }, { "epoch": 433.0, "eval_accuracy": 0.42253176930596287, "eval_loss": 3.9849016666412354, "eval_runtime": 0.6141, "eval_samples_per_second": 6.514, "eval_steps_per_second": 1.628, "step": 16454 }, { "epoch": 434.0, "learning_rate": 1.32e-07, "loss": 3.5023, "step": 16492 }, { "epoch": 434.0, "eval_accuracy": 0.4227761485826002, "eval_loss": 3.9856038093566895, "eval_runtime": 0.6134, "eval_samples_per_second": 6.521, "eval_steps_per_second": 1.63, "step": 16492 }, { "epoch": 435.0, "learning_rate": 1.3e-07, "loss": 3.4982, "step": 16530 }, { "epoch": 435.0, "eval_accuracy": 0.4227761485826002, "eval_loss": 3.9859957695007324, "eval_runtime": 0.6136, "eval_samples_per_second": 6.519, "eval_steps_per_second": 1.63, "step": 16530 }, { "epoch": 436.0, "learning_rate": 1.28e-07, "loss": 3.4927, "step": 16568 }, { "epoch": 436.0, "eval_accuracy": 0.42302052785923755, "eval_loss": 3.9858930110931396, "eval_runtime": 0.6145, "eval_samples_per_second": 6.51, "eval_steps_per_second": 1.627, "step": 16568 }, { "epoch": 437.0, "learning_rate": 1.26e-07, "loss": 3.4959, "step": 16606 }, { "epoch": 437.0, "eval_accuracy": 0.42302052785923755, "eval_loss": 3.986088514328003, "eval_runtime": 0.6137, "eval_samples_per_second": 6.518, "eval_steps_per_second": 1.629, "step": 16606 }, { "epoch": 438.0, "learning_rate": 1.24e-07, "loss": 3.4984, "step": 16644 }, { "epoch": 438.0, "eval_accuracy": 0.4227761485826002, "eval_loss": 3.9860339164733887, "eval_runtime": 0.6132, "eval_samples_per_second": 6.523, "eval_steps_per_second": 1.631, "step": 16644 }, { "epoch": 439.0, "learning_rate": 1.2199999999999998e-07, "loss": 3.5005, "step": 16682 }, { "epoch": 439.0, "eval_accuracy": 0.42302052785923755, "eval_loss": 3.9846749305725098, "eval_runtime": 0.6141, "eval_samples_per_second": 6.514, "eval_steps_per_second": 1.629, "step": 16682 }, { "epoch": 440.0, "learning_rate": 1.2e-07, "loss": 3.4947, "step": 16720 }, { "epoch": 440.0, "eval_accuracy": 0.42302052785923755, "eval_loss": 3.9845149517059326, "eval_runtime": 0.6202, "eval_samples_per_second": 6.45, "eval_steps_per_second": 1.612, "step": 16720 }, { "epoch": 441.0, "learning_rate": 1.1799999999999998e-07, "loss": 3.4964, "step": 16758 }, { "epoch": 441.0, "eval_accuracy": 0.42302052785923755, "eval_loss": 3.9842681884765625, "eval_runtime": 0.6165, "eval_samples_per_second": 6.488, "eval_steps_per_second": 1.622, "step": 16758 }, { "epoch": 442.0, "learning_rate": 1.16e-07, "loss": 3.4955, "step": 16796 }, { "epoch": 442.0, "eval_accuracy": 0.4232649071358749, "eval_loss": 3.9844443798065186, "eval_runtime": 0.6135, "eval_samples_per_second": 6.52, "eval_steps_per_second": 1.63, "step": 16796 }, { "epoch": 443.0, "learning_rate": 1.14e-07, "loss": 3.4923, "step": 16834 }, { "epoch": 443.0, "eval_accuracy": 0.4232649071358749, "eval_loss": 3.9843380451202393, "eval_runtime": 0.6148, "eval_samples_per_second": 6.507, "eval_steps_per_second": 1.627, "step": 16834 }, { "epoch": 444.0, "learning_rate": 1.12e-07, "loss": 3.4993, "step": 16872 }, { "epoch": 444.0, "eval_accuracy": 0.42302052785923755, "eval_loss": 3.9841716289520264, "eval_runtime": 0.6137, "eval_samples_per_second": 6.518, "eval_steps_per_second": 1.63, "step": 16872 }, { "epoch": 445.0, "learning_rate": 1.0999999999999999e-07, "loss": 3.4889, "step": 16910 }, { "epoch": 445.0, "eval_accuracy": 0.4232649071358749, "eval_loss": 3.9846384525299072, "eval_runtime": 0.6147, "eval_samples_per_second": 6.507, "eval_steps_per_second": 1.627, "step": 16910 }, { "epoch": 446.0, "learning_rate": 1.0799999999999999e-07, "loss": 3.487, "step": 16948 }, { "epoch": 446.0, "eval_accuracy": 0.4232649071358749, "eval_loss": 3.98549485206604, "eval_runtime": 0.6139, "eval_samples_per_second": 6.516, "eval_steps_per_second": 1.629, "step": 16948 }, { "epoch": 447.0, "learning_rate": 1.06e-07, "loss": 3.4965, "step": 16986 }, { "epoch": 447.0, "eval_accuracy": 0.4232649071358749, "eval_loss": 3.985051155090332, "eval_runtime": 0.6134, "eval_samples_per_second": 6.521, "eval_steps_per_second": 1.63, "step": 16986 }, { "epoch": 448.0, "learning_rate": 1.0399999999999999e-07, "loss": 3.4873, "step": 17024 }, { "epoch": 448.0, "eval_accuracy": 0.4232649071358749, "eval_loss": 3.9851512908935547, "eval_runtime": 0.6232, "eval_samples_per_second": 6.419, "eval_steps_per_second": 1.605, "step": 17024 }, { "epoch": 449.0, "learning_rate": 1.0199999999999999e-07, "loss": 3.4936, "step": 17062 }, { "epoch": 449.0, "eval_accuracy": 0.4232649071358749, "eval_loss": 3.984747886657715, "eval_runtime": 0.6132, "eval_samples_per_second": 6.523, "eval_steps_per_second": 1.631, "step": 17062 }, { "epoch": 450.0, "learning_rate": 1e-07, "loss": 3.494, "step": 17100 }, { "epoch": 450.0, "eval_accuracy": 0.4232649071358749, "eval_loss": 3.984121084213257, "eval_runtime": 0.6141, "eval_samples_per_second": 6.513, "eval_steps_per_second": 1.628, "step": 17100 }, { "epoch": 451.0, "learning_rate": 9.8e-08, "loss": 3.4855, "step": 17138 }, { "epoch": 451.0, "eval_accuracy": 0.4232649071358749, "eval_loss": 3.9835801124572754, "eval_runtime": 0.6136, "eval_samples_per_second": 6.519, "eval_steps_per_second": 1.63, "step": 17138 }, { "epoch": 452.0, "learning_rate": 9.6e-08, "loss": 3.4898, "step": 17176 }, { "epoch": 452.0, "eval_accuracy": 0.42302052785923755, "eval_loss": 3.983008623123169, "eval_runtime": 0.6136, "eval_samples_per_second": 6.518, "eval_steps_per_second": 1.63, "step": 17176 }, { "epoch": 453.0, "learning_rate": 9.4e-08, "loss": 3.4866, "step": 17214 }, { "epoch": 453.0, "eval_accuracy": 0.4232649071358749, "eval_loss": 3.9831044673919678, "eval_runtime": 0.6141, "eval_samples_per_second": 6.513, "eval_steps_per_second": 1.628, "step": 17214 }, { "epoch": 454.0, "learning_rate": 9.199999999999999e-08, "loss": 3.4866, "step": 17252 }, { "epoch": 454.0, "eval_accuracy": 0.42350928641251223, "eval_loss": 3.983053207397461, "eval_runtime": 0.6145, "eval_samples_per_second": 6.509, "eval_steps_per_second": 1.627, "step": 17252 }, { "epoch": 455.0, "learning_rate": 9e-08, "loss": 3.4886, "step": 17290 }, { "epoch": 455.0, "eval_accuracy": 0.4237536656891496, "eval_loss": 3.9836299419403076, "eval_runtime": 0.6142, "eval_samples_per_second": 6.512, "eval_steps_per_second": 1.628, "step": 17290 }, { "epoch": 456.0, "learning_rate": 8.8e-08, "loss": 3.4874, "step": 17328 }, { "epoch": 456.0, "eval_accuracy": 0.4237536656891496, "eval_loss": 3.983760118484497, "eval_runtime": 0.6148, "eval_samples_per_second": 6.506, "eval_steps_per_second": 1.627, "step": 17328 }, { "epoch": 457.0, "learning_rate": 8.599999999999999e-08, "loss": 3.486, "step": 17366 }, { "epoch": 457.0, "eval_accuracy": 0.4237536656891496, "eval_loss": 3.983823776245117, "eval_runtime": 0.6137, "eval_samples_per_second": 6.518, "eval_steps_per_second": 1.63, "step": 17366 }, { "epoch": 458.0, "learning_rate": 8.4e-08, "loss": 3.4869, "step": 17404 }, { "epoch": 458.0, "eval_accuracy": 0.42350928641251223, "eval_loss": 3.983541488647461, "eval_runtime": 0.6135, "eval_samples_per_second": 6.52, "eval_steps_per_second": 1.63, "step": 17404 }, { "epoch": 459.0, "learning_rate": 8.2e-08, "loss": 3.4845, "step": 17442 }, { "epoch": 459.0, "eval_accuracy": 0.4237536656891496, "eval_loss": 3.9833405017852783, "eval_runtime": 0.6146, "eval_samples_per_second": 6.508, "eval_steps_per_second": 1.627, "step": 17442 }, { "epoch": 460.0, "learning_rate": 8e-08, "loss": 3.4849, "step": 17480 }, { "epoch": 460.0, "eval_accuracy": 0.4237536656891496, "eval_loss": 3.9825379848480225, "eval_runtime": 0.6139, "eval_samples_per_second": 6.516, "eval_steps_per_second": 1.629, "step": 17480 }, { "epoch": 461.0, "learning_rate": 7.8e-08, "loss": 3.4841, "step": 17518 }, { "epoch": 461.0, "eval_accuracy": 0.42350928641251223, "eval_loss": 3.981783628463745, "eval_runtime": 0.6149, "eval_samples_per_second": 6.505, "eval_steps_per_second": 1.626, "step": 17518 }, { "epoch": 462.0, "learning_rate": 7.599999999999999e-08, "loss": 3.4924, "step": 17556 }, { "epoch": 462.0, "eval_accuracy": 0.42350928641251223, "eval_loss": 3.9813952445983887, "eval_runtime": 0.6143, "eval_samples_per_second": 6.512, "eval_steps_per_second": 1.628, "step": 17556 }, { "epoch": 463.0, "learning_rate": 7.399999999999999e-08, "loss": 3.571, "step": 17594 }, { "epoch": 463.0, "eval_accuracy": 0.42350928641251223, "eval_loss": 3.981501579284668, "eval_runtime": 0.6142, "eval_samples_per_second": 6.513, "eval_steps_per_second": 1.628, "step": 17594 }, { "epoch": 464.0, "learning_rate": 7.2e-08, "loss": 3.4811, "step": 17632 }, { "epoch": 464.0, "eval_accuracy": 0.42350928641251223, "eval_loss": 3.981280565261841, "eval_runtime": 0.6153, "eval_samples_per_second": 6.501, "eval_steps_per_second": 1.625, "step": 17632 }, { "epoch": 465.0, "learning_rate": 7e-08, "loss": 3.4851, "step": 17670 }, { "epoch": 465.0, "eval_accuracy": 0.42350928641251223, "eval_loss": 3.9809834957122803, "eval_runtime": 0.6137, "eval_samples_per_second": 6.518, "eval_steps_per_second": 1.629, "step": 17670 }, { "epoch": 466.0, "learning_rate": 6.8e-08, "loss": 3.4776, "step": 17708 }, { "epoch": 466.0, "eval_accuracy": 0.4237536656891496, "eval_loss": 3.9812755584716797, "eval_runtime": 0.6144, "eval_samples_per_second": 6.51, "eval_steps_per_second": 1.628, "step": 17708 }, { "epoch": 467.0, "learning_rate": 6.6e-08, "loss": 3.4849, "step": 17746 }, { "epoch": 467.0, "eval_accuracy": 0.42350928641251223, "eval_loss": 3.981030225753784, "eval_runtime": 0.6141, "eval_samples_per_second": 6.514, "eval_steps_per_second": 1.629, "step": 17746 }, { "epoch": 468.0, "learning_rate": 6.4e-08, "loss": 3.4766, "step": 17784 }, { "epoch": 468.0, "eval_accuracy": 0.4237536656891496, "eval_loss": 3.9813342094421387, "eval_runtime": 0.6138, "eval_samples_per_second": 6.517, "eval_steps_per_second": 1.629, "step": 17784 }, { "epoch": 469.0, "learning_rate": 6.2e-08, "loss": 3.4791, "step": 17822 }, { "epoch": 469.0, "eval_accuracy": 0.4237536656891496, "eval_loss": 3.981501817703247, "eval_runtime": 0.6135, "eval_samples_per_second": 6.52, "eval_steps_per_second": 1.63, "step": 17822 }, { "epoch": 470.0, "learning_rate": 6e-08, "loss": 3.4814, "step": 17860 }, { "epoch": 470.0, "eval_accuracy": 0.4237536656891496, "eval_loss": 3.98130464553833, "eval_runtime": 0.6148, "eval_samples_per_second": 6.506, "eval_steps_per_second": 1.627, "step": 17860 }, { "epoch": 471.0, "learning_rate": 5.8e-08, "loss": 3.4861, "step": 17898 }, { "epoch": 471.0, "eval_accuracy": 0.4237536656891496, "eval_loss": 3.980907917022705, "eval_runtime": 0.6137, "eval_samples_per_second": 6.518, "eval_steps_per_second": 1.63, "step": 17898 }, { "epoch": 472.0, "learning_rate": 5.6e-08, "loss": 3.4861, "step": 17936 }, { "epoch": 472.0, "eval_accuracy": 0.42350928641251223, "eval_loss": 3.9806013107299805, "eval_runtime": 0.6133, "eval_samples_per_second": 6.522, "eval_steps_per_second": 1.631, "step": 17936 }, { "epoch": 473.0, "learning_rate": 5.3999999999999994e-08, "loss": 3.4825, "step": 17974 }, { "epoch": 473.0, "eval_accuracy": 0.42350928641251223, "eval_loss": 3.9808974266052246, "eval_runtime": 0.6137, "eval_samples_per_second": 6.517, "eval_steps_per_second": 1.629, "step": 17974 }, { "epoch": 474.0, "learning_rate": 5.1999999999999996e-08, "loss": 3.4758, "step": 18012 }, { "epoch": 474.0, "eval_accuracy": 0.42350928641251223, "eval_loss": 3.98111629486084, "eval_runtime": 0.6144, "eval_samples_per_second": 6.511, "eval_steps_per_second": 1.628, "step": 18012 }, { "epoch": 475.0, "learning_rate": 5e-08, "loss": 3.4811, "step": 18050 }, { "epoch": 475.0, "eval_accuracy": 0.42350928641251223, "eval_loss": 3.980703592300415, "eval_runtime": 0.6137, "eval_samples_per_second": 6.518, "eval_steps_per_second": 1.629, "step": 18050 }, { "epoch": 476.0, "learning_rate": 4.8e-08, "loss": 3.4831, "step": 18088 }, { "epoch": 476.0, "eval_accuracy": 0.4237536656891496, "eval_loss": 3.980832815170288, "eval_runtime": 0.614, "eval_samples_per_second": 6.515, "eval_steps_per_second": 1.629, "step": 18088 }, { "epoch": 477.0, "learning_rate": 4.5999999999999995e-08, "loss": 3.4837, "step": 18126 }, { "epoch": 477.0, "eval_accuracy": 0.4237536656891496, "eval_loss": 3.980334758758545, "eval_runtime": 0.613, "eval_samples_per_second": 6.526, "eval_steps_per_second": 1.631, "step": 18126 }, { "epoch": 478.0, "learning_rate": 4.4e-08, "loss": 3.4843, "step": 18164 }, { "epoch": 478.0, "eval_accuracy": 0.4239980449657869, "eval_loss": 3.9802615642547607, "eval_runtime": 0.6137, "eval_samples_per_second": 6.518, "eval_steps_per_second": 1.63, "step": 18164 }, { "epoch": 479.0, "learning_rate": 4.2e-08, "loss": 3.4825, "step": 18202 }, { "epoch": 479.0, "eval_accuracy": 0.4239980449657869, "eval_loss": 3.9801557064056396, "eval_runtime": 0.6144, "eval_samples_per_second": 6.51, "eval_steps_per_second": 1.628, "step": 18202 }, { "epoch": 480.0, "learning_rate": 4e-08, "loss": 3.4807, "step": 18240 }, { "epoch": 480.0, "eval_accuracy": 0.4239980449657869, "eval_loss": 3.979966163635254, "eval_runtime": 0.6189, "eval_samples_per_second": 6.463, "eval_steps_per_second": 1.616, "step": 18240 }, { "epoch": 481.0, "learning_rate": 3.7999999999999996e-08, "loss": 3.4808, "step": 18278 }, { "epoch": 481.0, "eval_accuracy": 0.4239980449657869, "eval_loss": 3.9796664714813232, "eval_runtime": 0.618, "eval_samples_per_second": 6.473, "eval_steps_per_second": 1.618, "step": 18278 }, { "epoch": 482.0, "learning_rate": 3.6e-08, "loss": 3.4805, "step": 18316 }, { "epoch": 482.0, "eval_accuracy": 0.4239980449657869, "eval_loss": 3.9796643257141113, "eval_runtime": 0.6136, "eval_samples_per_second": 6.519, "eval_steps_per_second": 1.63, "step": 18316 }, { "epoch": 483.0, "learning_rate": 3.4e-08, "loss": 3.4818, "step": 18354 }, { "epoch": 483.0, "eval_accuracy": 0.4239980449657869, "eval_loss": 3.979565382003784, "eval_runtime": 0.6125, "eval_samples_per_second": 6.531, "eval_steps_per_second": 1.633, "step": 18354 }, { "epoch": 484.0, "learning_rate": 3.2e-08, "loss": 3.4821, "step": 18392 }, { "epoch": 484.0, "eval_accuracy": 0.4239980449657869, "eval_loss": 3.9793689250946045, "eval_runtime": 0.6138, "eval_samples_per_second": 6.517, "eval_steps_per_second": 1.629, "step": 18392 }, { "epoch": 485.0, "learning_rate": 3e-08, "loss": 3.4802, "step": 18430 }, { "epoch": 485.0, "eval_accuracy": 0.4239980449657869, "eval_loss": 3.979444980621338, "eval_runtime": 0.6144, "eval_samples_per_second": 6.51, "eval_steps_per_second": 1.628, "step": 18430 }, { "epoch": 486.0, "learning_rate": 2.8e-08, "loss": 3.4805, "step": 18468 }, { "epoch": 486.0, "eval_accuracy": 0.4239980449657869, "eval_loss": 3.979588508605957, "eval_runtime": 0.6134, "eval_samples_per_second": 6.521, "eval_steps_per_second": 1.63, "step": 18468 }, { "epoch": 487.0, "learning_rate": 2.5999999999999998e-08, "loss": 3.4831, "step": 18506 }, { "epoch": 487.0, "eval_accuracy": 0.4239980449657869, "eval_loss": 3.9796085357666016, "eval_runtime": 0.6257, "eval_samples_per_second": 6.393, "eval_steps_per_second": 1.598, "step": 18506 }, { "epoch": 488.0, "learning_rate": 2.4e-08, "loss": 3.4846, "step": 18544 }, { "epoch": 488.0, "eval_accuracy": 0.4239980449657869, "eval_loss": 3.97976016998291, "eval_runtime": 0.6147, "eval_samples_per_second": 6.507, "eval_steps_per_second": 1.627, "step": 18544 }, { "epoch": 489.0, "learning_rate": 2.2e-08, "loss": 3.4824, "step": 18582 }, { "epoch": 489.0, "eval_accuracy": 0.4239980449657869, "eval_loss": 3.9797983169555664, "eval_runtime": 0.6255, "eval_samples_per_second": 6.395, "eval_steps_per_second": 1.599, "step": 18582 }, { "epoch": 490.0, "learning_rate": 2e-08, "loss": 3.4807, "step": 18620 }, { "epoch": 490.0, "eval_accuracy": 0.4239980449657869, "eval_loss": 3.9798743724823, "eval_runtime": 0.6145, "eval_samples_per_second": 6.509, "eval_steps_per_second": 1.627, "step": 18620 }, { "epoch": 491.0, "learning_rate": 1.8e-08, "loss": 3.4809, "step": 18658 }, { "epoch": 491.0, "eval_accuracy": 0.4239980449657869, "eval_loss": 3.9799368381500244, "eval_runtime": 0.6143, "eval_samples_per_second": 6.511, "eval_steps_per_second": 1.628, "step": 18658 }, { "epoch": 492.0, "learning_rate": 1.6e-08, "loss": 3.4801, "step": 18696 }, { "epoch": 492.0, "eval_accuracy": 0.4237536656891496, "eval_loss": 3.9799206256866455, "eval_runtime": 0.6136, "eval_samples_per_second": 6.519, "eval_steps_per_second": 1.63, "step": 18696 }, { "epoch": 493.0, "learning_rate": 1.4e-08, "loss": 3.479, "step": 18734 }, { "epoch": 493.0, "eval_accuracy": 0.4237536656891496, "eval_loss": 3.9799153804779053, "eval_runtime": 0.6136, "eval_samples_per_second": 6.519, "eval_steps_per_second": 1.63, "step": 18734 }, { "epoch": 494.0, "learning_rate": 1.2e-08, "loss": 3.48, "step": 18772 }, { "epoch": 494.0, "eval_accuracy": 0.4237536656891496, "eval_loss": 3.9799201488494873, "eval_runtime": 0.6126, "eval_samples_per_second": 6.529, "eval_steps_per_second": 1.632, "step": 18772 }, { "epoch": 495.0, "learning_rate": 1e-08, "loss": 3.4828, "step": 18810 }, { "epoch": 495.0, "eval_accuracy": 0.4237536656891496, "eval_loss": 3.9799094200134277, "eval_runtime": 0.6243, "eval_samples_per_second": 6.407, "eval_steps_per_second": 1.602, "step": 18810 }, { "epoch": 496.0, "learning_rate": 8e-09, "loss": 3.4812, "step": 18848 }, { "epoch": 496.0, "eval_accuracy": 0.4237536656891496, "eval_loss": 3.979907512664795, "eval_runtime": 0.6138, "eval_samples_per_second": 6.516, "eval_steps_per_second": 1.629, "step": 18848 }, { "epoch": 497.0, "learning_rate": 6e-09, "loss": 3.4798, "step": 18886 }, { "epoch": 497.0, "eval_accuracy": 0.4237536656891496, "eval_loss": 3.9798967838287354, "eval_runtime": 0.6148, "eval_samples_per_second": 6.506, "eval_steps_per_second": 1.626, "step": 18886 }, { "epoch": 498.0, "learning_rate": 4e-09, "loss": 3.4866, "step": 18924 }, { "epoch": 498.0, "eval_accuracy": 0.4237536656891496, "eval_loss": 3.979888677597046, "eval_runtime": 0.614, "eval_samples_per_second": 6.515, "eval_steps_per_second": 1.629, "step": 18924 }, { "epoch": 499.0, "learning_rate": 2e-09, "loss": 3.4785, "step": 18962 }, { "epoch": 499.0, "eval_accuracy": 0.4237536656891496, "eval_loss": 3.979886054992676, "eval_runtime": 0.6141, "eval_samples_per_second": 6.513, "eval_steps_per_second": 1.628, "step": 18962 }, { "epoch": 500.0, "learning_rate": 0.0, "loss": 3.4893, "step": 19000 }, { "epoch": 500.0, "eval_accuracy": 0.4237536656891496, "eval_loss": 3.979886293411255, "eval_runtime": 0.6133, "eval_samples_per_second": 6.522, "eval_steps_per_second": 1.63, "step": 19000 }, { "epoch": 500.0, "step": 19000, "total_flos": 8.26946617344e+16, "train_loss": 3.948820646587171, "train_runtime": 16522.6397, "train_samples_per_second": 1.15, "train_steps_per_second": 1.15 } ], "logging_steps": 500, "max_steps": 19000, "num_input_tokens_seen": 0, "num_train_epochs": 500, "save_steps": 500, "total_flos": 8.26946617344e+16, "train_batch_size": 1, "trial_name": null, "trial_params": null }